From fde712e19cb14c3d6074b85a8f81acde38a84bd1 Mon Sep 17 00:00:00 2001 From: Benjamin Elder Date: Tue, 16 Jun 2026 17:09:17 -0700 Subject: [PATCH 1/6] feat(api): add SandboxConfig CRD; move sandbox binaries off ActorTemplate Introduce a cluster-scoped SandboxConfig CRD holding the sandbox binaries (per sandbox class, per arch: url + sha256) and decouple binary selection from the ActorTemplate: - New SandboxConfig with SandboxClass (gvisor|microvm), a Default flag (cluster default per class), and Assets[arch][name]=AssetFile. - WorkerPool gains SandboxClass + SandboxConfigName (resolve by name, else the cluster default for the class). - ActorTemplate drops Runsc and the runsc/auth config types. - Regenerated CRDs, deepcopy, and clientset/listers/informers. - docs/api-guide.md: document SandboxConfig and the new WorkerPool fields. --- docs/api-guide.md | 67 +++++++-- .../generated/ate.dev_actortemplates.yaml | 53 -------- .../generated/ate.dev_sandboxconfigs.yaml | 128 ++++++++++++++++++ .../generated/ate.dev_workerpools.yaml | 19 +++ pkg/api/v1alpha1/actortemplate_types.go | 51 ------- .../v1alpha1/actortemplate_validation_test.go | 32 ----- pkg/api/v1alpha1/sandboxconfig_types.go | 116 ++++++++++++++++ pkg/api/v1alpha1/workerpool_types.go | 17 +++ pkg/api/v1alpha1/zz_generated.deepcopy.go | 93 ++++++++----- .../typed/api/v1alpha1/api_client.go | 5 + .../api/v1alpha1/fake/fake_api_client.go | 4 + .../api/v1alpha1/fake/fake_sandboxconfig.go | 50 +++++++ .../typed/api/v1alpha1/generated_expansion.go | 2 + .../typed/api/v1alpha1/sandboxconfig.go | 66 +++++++++ .../api/v1alpha1/interface.go | 7 + .../api/v1alpha1/sandboxconfig.go | 99 ++++++++++++++ .../informers/externalversions/generic.go | 2 + .../api/v1alpha1/expansion_generated.go | 4 + .../listers/api/v1alpha1/sandboxconfig.go | 46 +++++++ 19 files changed, 678 insertions(+), 183 deletions(-) create mode 100644 manifests/ate-install/generated/ate.dev_sandboxconfigs.yaml create mode 100644 pkg/api/v1alpha1/sandboxconfig_types.go create mode 100644 pkg/client/clientset/versioned/typed/api/v1alpha1/fake/fake_sandboxconfig.go create mode 100644 pkg/client/clientset/versioned/typed/api/v1alpha1/sandboxconfig.go create mode 100644 pkg/client/informers/externalversions/api/v1alpha1/sandboxconfig.go create mode 100644 pkg/client/listers/api/v1alpha1/sandboxconfig.go diff --git a/docs/api-guide.md b/docs/api-guide.md index c1f506248..6d18eb3b3 100644 --- a/docs/api-guide.md +++ b/docs/api-guide.md @@ -12,6 +12,8 @@ The `WorkerPool` defines the pool of physical "warm" compute capacity. It manage | :--- | :--- | :--- | | `replicas` | `int32` | **Required.** Number of physical standby pods to maintain in the cluster. | | `ateomImage` | `string` | **Required.** The container image for the `ateom` herder process (e.g. `ko://github.com/agent-substrate/substrate/cmd/ateom-gvisor`). | +| `sandboxClass` | `string` | Optional. The sandbox runtime family for the pool: `gvisor` (default) or `microvm`. Drives the worker pod shape (e.g. KVM device mounts, node placement) and which `SandboxConfig`s are eligible. | +| `sandboxConfigName` | `string` | Optional. Name of a cluster-scoped [`SandboxConfig`](#3-sandboxconfig-sandbox-binaries) providing the sandbox binaries. If empty, the cluster default `SandboxConfig` for the pool's `sandboxClass` is used. | | `template` | `WorkerPoolPodTemplate` | **Optional.** Pod scheduling and resource settings for worker pods. | #### `WorkerPoolPodTemplate` (`spec.template`) @@ -34,6 +36,8 @@ metadata: spec: replicas: 10 ateomImage: ko://github.com/agent-substrate/substrate/cmd/ateom-gvisor + # sandboxClass defaults to gvisor; the pool resolves to the cluster's default + # gvisor SandboxConfig unless sandboxConfigName is set. ``` ### Example with GPU node scheduling @@ -78,7 +82,8 @@ The `ActorTemplate` defines the code, environment, and state-management policies | `workerPoolRef` | `ObjectReference` | **Required.** Pointer to the `WorkerPool` that provides the physical pods for this template. | | `snapshotsConfig` | `SnapshotsConfig` | **Required.** GCS bucket and folder where memory snapshots are stored. | | `pauseImage` | `string` | **Required.** The image used for the sandbox root (e.g. `gcr.io/gke-release/pause`). | -| `runsc` | `RunscConfig` | **Required.** Multi-platform configuration for fetching the gVisor binary. | + +The sandbox binaries (e.g. the gVisor `runsc` binary) are **no longer configured on the `ActorTemplate`**. They are resolved from the referenced `WorkerPool`'s [`SandboxConfig`](#3-sandboxconfig-sandbox-binaries) — by name (`workerPool.spec.sandboxConfigName`) or, by default, the cluster default `SandboxConfig` for the pool's `sandboxClass`. Container environment variables support literal `value` entries and `valueFrom.secretKeyRef`. Secret references are resolved by `ate-api-server` from the `ActorTemplate` namespace when a workload spec is materialized. For the golden actor, the resolved values are captured in the golden snapshot and future actors inherit those values until the golden snapshot is recreated. For an actor that bypasses the golden snapshot and boots from the current template spec, the resolved values are sent to atelet but are not serialized into the public Actor API. Other Kubernetes `valueFrom` sources are not supported yet. Secret changes do not automatically restart actors or invalidate snapshots; rotating a Secret requires an explicit actor or template lifecycle action. @@ -101,15 +106,8 @@ metadata: name: secret-agent namespace: ate-demo spec: - runsc: - amd64: - # Note: These values are from the 2026-05-19 nightly. - # For the latest verified versions, see: demos/counter/counter.yaml.tmpl - url: "gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc" - sha256Hash: "a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63" - arm64: - url: "gs://gvisor/releases/nightly/2026-05-19/aarch64/runsc" - sha256Hash: "1ba2366ae2efceba166046f51a4104f9261c9cb72c6db8f5b3fe2dc57dea86b9" + # No sandbox/runsc config here — the binaries come from the WorkerPool's + # SandboxConfig (see section 3). pauseImage: "gcr.io/gke-release/pause@sha256:bcbd57ba5653580ec647b16d8163cdd1112df3609129b01f912a8032e48265da" containers: - name: agent @@ -126,7 +124,46 @@ spec: --- -## 3. Operational Workflow +## 3. SandboxConfig: Sandbox Binaries + +`SandboxConfig` is a **cluster-scoped** resource that decouples the sandbox binaries (the gVisor `runsc` binary, or a micro-VM kernel/firmware/config) from the `ActorTemplate`. A `WorkerPool` resolves its binaries from a `SandboxConfig` — either the one named by `spec.sandboxConfigName`, or the cluster default for the pool's `sandboxClass`. + +This means a single, cluster-managed config pins the sandbox runtime version for many templates: snapshots stay restorable because the version is recorded in each snapshot's manifest, and operators upgrade the runtime in one place. + +### Specification (`SandboxConfigSpec`) + +| Field | Type | Description | +| :--- | :--- | :--- | +| `sandboxClass` | `string` | **Required.** Runtime family this config applies to: `gvisor` (default) or `microvm`. A `WorkerPool` only uses `SandboxConfig`s whose `sandboxClass` matches its own. | +| `default` | `bool` | Optional. Marks this as the cluster default for its `sandboxClass`. A `WorkerPool` with no `sandboxConfigName` resolves to the default for its class. At most one default per class. | +| `assets` | `map[arch]map[name]AssetFile` | Optional. Content-addressed files atelet fetches, keyed by architecture (`amd64`, `arm64`) then asset name. gVisor expects a `runsc` asset; a micro-VM backend expects several. Each `AssetFile` is a `{ url, sha256 }` pair. | + +A default cluster-wide gVisor `SandboxConfig` (`gvisor-default`) is installed with the platform, so gVisor pools work out of the box. + +### Example + +```yaml +apiVersion: ate.dev/v1alpha1 +kind: SandboxConfig +metadata: + name: gvisor-default +spec: + sandboxClass: gvisor + default: true + assets: + amd64: + runsc: + url: "gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc" + sha256: "a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63" + arm64: + runsc: + url: "gs://gvisor/releases/nightly/2026-05-19/aarch64/runsc" + sha256: "1ba2366ae2efceba166046f51a4104f9261c9cb72c6db8f5b3fe2dc57dea86b9" +``` + +--- + +## 4. Operational Workflow ### The Golden Snapshot When an `ActorTemplate` is created: @@ -140,14 +177,14 @@ Once a template is `Ready`, creating an actor logically (via `kubectl-ate create --- -## 4. Best Practices +## 5. Best Practices * **Startup Logic:** Place expensive initialization (loading large models, establishing baseline connections) in your application's entry point. These will be captured in the Golden Snapshot and won't need to be repeated on every resumption. * **Symmetry:** Ensure your `ActorTemplate` and `WorkerPool` are in the same namespace or have appropriate RBAC permissions to reference each other. * **Version Management:** When updating code, create a new `ActorTemplate` (e.g. `v2`). Substrate treats each template as an immutable state root. --- -## 5. Control Plane gRPC API +## 6. Control Plane gRPC API The Substrate Control Plane (`ate-api-server`) exposes a gRPC interface for managing actors and workers. This is the primary API used by the `kubectl-ate` CLI and higher-level frameworks. @@ -192,7 +229,7 @@ Query the physical resource pool. --- -## 6. Advanced: Session Identity +## 7. Advanced: Session Identity Workloads can exchange their ephemeral Kubernetes credentials for stable **Session Identity** credentials that persist even as the process migrates between different physical workers. @@ -202,7 +239,7 @@ Workloads can exchange their ephemeral Kubernetes credentials for stable **Sessi --- -## 7. Framework & Ecosystem Integration +## 8. Framework & Ecosystem Integration Agent Substrate is designed to be the foundational execution layer for any agentic framework. diff --git a/manifests/ate-install/generated/ate.dev_actortemplates.yaml b/manifests/ate-install/generated/ate.dev_actortemplates.yaml index bea32893a..7aaa87727 100644 --- a/manifests/ate-install/generated/ate.dev_actortemplates.yaml +++ b/manifests/ate-install/generated/ate.dev_actortemplates.yaml @@ -167,58 +167,6 @@ spec: - message: All images must be pinned (changing the image invalidates snapshots) rule: self.contains('@') - runsc: - description: Parameters for fetching the runsc binary to use. - properties: - amd64: - description: Configuration for the amd64 binary. - properties: - sha256Hash: - description: |- - The SHA256 hash of the binary to download. Used both to name the - downloaded file (for preventing conflicts), and to check the integrity of - the downloaded file. - pattern: ^[a-z0-9]+$ - type: string - url: - description: | - A gs:// URL pointing to a runsc binary that can be downloaded (possibly - with atelet's credentials). - minLength: 1 - type: string - required: - - sha256Hash - - url - type: object - arm64: - description: Configuration for the arm64 binary. - properties: - sha256Hash: - description: |- - The SHA256 hash of the binary to download. Used both to name the - downloaded file (for preventing conflicts), and to check the integrity of - the downloaded file. - pattern: ^[a-z0-9]+$ - type: string - url: - description: | - A gs:// URL pointing to a runsc binary that can be downloaded (possibly - with atelet's credentials). - minLength: 1 - type: string - required: - - sha256Hash - - url - type: object - authentication: - description: How should atelet authenticate to download the runsc - binary? - properties: - gcp: - description: Use GCP application-default credentials. - type: object - type: object - type: object snapshotsConfig: description: Snapshots configuration for the actor. properties: @@ -275,7 +223,6 @@ spec: x-kubernetes-map-type: atomic required: - pauseImage - - runsc - snapshotsConfig - workerPoolRef type: object diff --git a/manifests/ate-install/generated/ate.dev_sandboxconfigs.yaml b/manifests/ate-install/generated/ate.dev_sandboxconfigs.yaml new file mode 100644 index 000000000..e6da1e977 --- /dev/null +++ b/manifests/ate-install/generated/ate.dev_sandboxconfigs.yaml @@ -0,0 +1,128 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.1 + name: sandboxconfigs.ate.dev +spec: + group: ate.dev + names: + kind: SandboxConfig + listKind: SandboxConfigList + plural: sandboxconfigs + shortNames: + - sandboxconfig + singular: sandboxconfig + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .spec.sandboxClass + name: Class + type: string + - jsonPath: .spec.default + name: Default + type: boolean + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + SandboxConfig is cluster-scoped configuration describing the sandbox binaries + for a sandbox runtime family. It is referenced (or defaulted) by WorkerPools + and decouples sandbox binary selection from ActorTemplate. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: spec defines the desired state of SandboxConfig + properties: + assets: + additionalProperties: + additionalProperties: + description: |- + AssetFile is one content-addressed file that atelet fetches for a sandbox + runtime (e.g. the gVisor runsc binary, or a micro-VM kernel/firmware/config). + properties: + sha256: + description: |- + SHA256 is the lower-case hex SHA256 of the asset. It both names the cached + file (preventing collisions) and verifies the download's integrity. + pattern: ^[a-f0-9]{64}$ + type: string + url: + description: |- + URL is where to download the asset from (e.g. a gs:// URL). It may be + fetched anonymously or with credentials depending on atelet's + configuration. + minLength: 1 + type: string + required: + - sha256 + - url + type: object + type: object + description: |- + Assets is the set of files atelet fetches for this runtime, keyed first by + architecture (GOARCH, e.g. "amd64", "arm64") and then by asset name. The + asset names are interpreted by the sandbox backend: gVisor expects a + "runsc" asset; a micro-VM backend expects several (e.g. "cloud-hypervisor", + "kata-kernel", "kata-image"). The schema is intentionally generic; + per-class requirements are enforced by a ValidatingAdmissionPolicy. + type: object + default: + description: |- + Default marks this SandboxConfig as the cluster-wide default for its + SandboxClass. A WorkerPool with no explicit SandboxConfigName resolves to + the default config for its SandboxClass. At most one default is expected + per SandboxClass. + type: boolean + sandboxClass: + default: gvisor + description: |- + SandboxClass is the sandbox runtime family this config applies to. A + WorkerPool only uses SandboxConfigs whose SandboxClass matches its own. + enum: + - gvisor + - microvm + type: string + required: + - sandboxClass + type: object + required: + - spec + type: object + served: true + storage: true + subresources: {} diff --git a/manifests/ate-install/generated/ate.dev_workerpools.yaml b/manifests/ate-install/generated/ate.dev_workerpools.yaml index 65c19137d..5714891ab 100644 --- a/manifests/ate-install/generated/ate.dev_workerpools.yaml +++ b/manifests/ate-install/generated/ate.dev_workerpools.yaml @@ -75,6 +75,25 @@ spec: format: int32 minimum: 0 type: integer + sandboxClass: + default: gvisor + description: |- + SandboxClass selects the sandbox runtime family for this pool, which drives + the worker pod shape (KVM/vhost device mounts and node placement) and which + SandboxConfigs are eligible. The concrete binary is still selected by + AteomImage. Defaults to gvisor. + enum: + - gvisor + - microvm + type: string + sandboxConfigName: + description: |- + SandboxConfigName names a cluster-scoped SandboxConfig to use for fetching + sandbox binaries. It overrides the cluster-wide default SandboxConfig for + this pool's SandboxClass. The referenced config's SandboxClass must match + this pool's SandboxClass. If empty, the default SandboxConfig for the + SandboxClass is used. + type: string template: description: Template holds optional pod scheduling and resource settings for worker pods. diff --git a/pkg/api/v1alpha1/actortemplate_types.go b/pkg/api/v1alpha1/actortemplate_types.go index 4be2fcf7f..8a61eb2e2 100644 --- a/pkg/api/v1alpha1/actortemplate_types.go +++ b/pkg/api/v1alpha1/actortemplate_types.go @@ -163,57 +163,6 @@ type ActorTemplateSpec struct { // +required // TODO: clone this type locally and add validation WorkerPoolRef corev1.ObjectReference `json:"workerPoolRef"` - - // Parameters for fetching the runsc binary to use. - // - // +required - Runsc RunscConfig `json:"runsc,omitempty"` -} - -type GCPAuthenticationConfig struct { -} - -// Authentication configuration for atelet to download static files. -// -// If no members are set, then atelet will use anonymous authentication. -type AuthenticationConfig struct { - // Use GCP application-default credentials. - // - // +optional - GCP *GCPAuthenticationConfig `json:"gcp,omitempty"` -} - -type RunscPlatformConfig struct { - // The SHA256 hash of the binary to download. Used both to name the - // downloaded file (for preventing conflicts), and to check the integrity of - // the downloaded file. - // - // +required - // +kubebuilder:validation:Pattern=`^[a-z0-9]+$` - SHA256Hash string `json:"sha256Hash,omitempty"` - - // A gs:// URL pointing to a runsc binary that can be downloaded (possibly - // with atelet's credentials). - // - // +required - // TODO: add real format checking - // +kubebuilder:validation:MinLength=1 - URL string `json:"url,omitempty"` -} - -type RunscConfig struct { - // Configuration for the amd64 binary. - // - // +optional - AMD64 *RunscPlatformConfig `json:"amd64,omitempty"` - - // Configuration for the arm64 binary. - // - // +optional - ARM64 *RunscPlatformConfig `json:"arm64,omitempty"` - - // How should atelet authenticate to download the runsc binary? - Authentication AuthenticationConfig `json:"authentication,omitempty"` } // TODO: add validation diff --git a/pkg/api/v1alpha1/actortemplate_validation_test.go b/pkg/api/v1alpha1/actortemplate_validation_test.go index d98459457..0443f7951 100644 --- a/pkg/api/v1alpha1/actortemplate_validation_test.go +++ b/pkg/api/v1alpha1/actortemplate_validation_test.go @@ -98,12 +98,6 @@ func TestActorTemplateValidation(t *testing.T) { WorkerPoolRef: corev1.ObjectReference{ Name: "test-pool", }, - Runsc: RunscConfig{ - AMD64: &RunscPlatformConfig{ - URL: "gs://bucket/runsc", - SHA256Hash: "deadbeef", - }, - }, }, } @@ -137,20 +131,6 @@ func TestActorTemplateValidation(t *testing.T) { }, wantErr: true, errMsg: "Invalid value", - }, { - name: "missing Runsc.AMD64.URL", - mutate: func(at *ActorTemplate) { - at.Spec.Runsc.AMD64.URL = "" - }, - wantErr: true, - errMsg: "Invalid value", - }, { - name: "missing Runsc.AMD64.SHA256Hash", - mutate: func(at *ActorTemplate) { - at.Spec.Runsc.AMD64.SHA256Hash = "" - }, - wantErr: true, - errMsg: "Invalid value", }, { name: "too many containers", mutate: func(at *ActorTemplate) { @@ -426,12 +406,6 @@ func TestActorTemplateSpecImmutability(t *testing.T) { WorkerPoolRef: corev1.ObjectReference{ Name: "test-pool", }, - Runsc: RunscConfig{ - AMD64: &RunscPlatformConfig{ - URL: "gs://bucket/runsc", - SHA256Hash: "deadbeef", - }, - }, }, } @@ -457,12 +431,6 @@ func TestActorTemplateSpecImmutability(t *testing.T) { at.Spec.WorkerPoolRef.Name = "new-pool" }, }, - { - name: "update-runsc-amd64-url", - mutate: func(at *ActorTemplate) { - at.Spec.Runsc.AMD64.URL = "gs://new-bucket/runsc" - }, - }, } for _, tt := range tests { diff --git a/pkg/api/v1alpha1/sandboxconfig_types.go b/pkg/api/v1alpha1/sandboxconfig_types.go new file mode 100644 index 000000000..fbaad908d --- /dev/null +++ b/pkg/api/v1alpha1/sandboxconfig_types.go @@ -0,0 +1,116 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// SandboxClass selects the sandbox runtime family. It is shared by WorkerPool +// (which family a pool runs) and SandboxConfig (which family a config is for). +type SandboxClass string + +const ( + // SandboxClassGvisor is the gVisor/runsc runtime (cmd/ateom-gvisor). Default. + SandboxClassGvisor SandboxClass = "gvisor" + // SandboxClassMicroVM is the micro-VM runtime (cmd/ateom-microvm); needs + // /dev/kvm and vhost devices. + SandboxClassMicroVM SandboxClass = "microvm" +) + +// AssetFile is one content-addressed file that atelet fetches for a sandbox +// runtime (e.g. the gVisor runsc binary, or a micro-VM kernel/firmware/config). +type AssetFile struct { + // URL is where to download the asset from (e.g. a gs:// URL). It may be + // fetched anonymously or with credentials depending on atelet's + // configuration. + // + // +required + // +kubebuilder:validation:MinLength=1 + URL string `json:"url"` + + // SHA256 is the lower-case hex SHA256 of the asset. It both names the cached + // file (preventing collisions) and verifies the download's integrity. + // + // +required + // +kubebuilder:validation:Pattern=`^[a-f0-9]{64}$` + SHA256 string `json:"sha256"` +} + +// SandboxConfigSpec is the desired state of a SandboxConfig. +type SandboxConfigSpec struct { + // SandboxClass is the sandbox runtime family this config applies to. A + // WorkerPool only uses SandboxConfigs whose SandboxClass matches its own. + // + // +required + // +kubebuilder:validation:Enum=gvisor;microvm + // +kubebuilder:default=gvisor + SandboxClass SandboxClass `json:"sandboxClass"` + + // Default marks this SandboxConfig as the cluster-wide default for its + // SandboxClass. A WorkerPool with no explicit SandboxConfigName resolves to + // the default config for its SandboxClass. At most one default is expected + // per SandboxClass. + // + // +optional + Default bool `json:"default,omitempty"` + + // Assets is the set of files atelet fetches for this runtime, keyed first by + // architecture (GOARCH, e.g. "amd64", "arm64") and then by asset name. The + // asset names are interpreted by the sandbox backend: gVisor expects a + // "runsc" asset; a micro-VM backend expects several (e.g. "cloud-hypervisor", + // "kata-kernel", "kata-image"). The schema is intentionally generic; + // per-class requirements are enforced by a ValidatingAdmissionPolicy. + // + // +optional + Assets map[string]map[string]AssetFile `json:"assets,omitempty"` +} + +// SandboxConfig is cluster-scoped configuration describing the sandbox binaries +// for a sandbox runtime family. It is referenced (or defaulted) by WorkerPools +// and decouples sandbox binary selection from ActorTemplate. +// +// +genclient +// +genclient:nonNamespaced +// +kubebuilder:object:generate=true +// +kubebuilder:object:root=true +// +kubebuilder:resource:scope=Cluster,shortName=sandboxconfig +// +kubebuilder:printcolumn:name="Class",type=string,JSONPath=`.spec.sandboxClass` +// +kubebuilder:printcolumn:name="Default",type=boolean,JSONPath=`.spec.default` +// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` +type SandboxConfig struct { + metav1.TypeMeta `json:",inline"` + + // metadata is a standard object metadata + // +optional + metav1.ObjectMeta `json:"metadata,omitempty"` + + // spec defines the desired state of SandboxConfig + // +required + Spec SandboxConfigSpec `json:"spec"` +} + +// SandboxConfigList contains a list of SandboxConfigs. +// +kubebuilder:object:generate=true +// +kubebuilder:object:root=true +type SandboxConfigList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []SandboxConfig `json:"items"` +} + +func init() { + SchemeBuilder.Register(&SandboxConfig{}, &SandboxConfigList{}) +} diff --git a/pkg/api/v1alpha1/workerpool_types.go b/pkg/api/v1alpha1/workerpool_types.go index 2a831389a..cb0818274 100644 --- a/pkg/api/v1alpha1/workerpool_types.go +++ b/pkg/api/v1alpha1/workerpool_types.go @@ -61,6 +61,23 @@ type WorkerPoolSpec struct { // // +optional Template *WorkerPoolPodTemplate `json:"template,omitempty"` + + // SandboxClass selects the sandbox runtime family for this pool, which drives + // the worker pod shape (KVM/vhost device mounts and node placement) and which + // SandboxConfigs are eligible. The concrete binary is still selected by + // AteomImage. Defaults to gvisor. + // +optional + // +kubebuilder:validation:Enum=gvisor;microvm + // +kubebuilder:default=gvisor + SandboxClass SandboxClass `json:"sandboxClass,omitempty"` + + // SandboxConfigName names a cluster-scoped SandboxConfig to use for fetching + // sandbox binaries. It overrides the cluster-wide default SandboxConfig for + // this pool's SandboxClass. The referenced config's SandboxClass must match + // this pool's SandboxClass. If empty, the default SandboxConfig for the + // SandboxClass is used. + // +optional + SandboxConfigName string `json:"sandboxConfigName,omitempty"` } type WorkerPoolStatus struct { diff --git a/pkg/api/v1alpha1/zz_generated.deepcopy.go b/pkg/api/v1alpha1/zz_generated.deepcopy.go index 9983ce2a5..f2901beb9 100644 --- a/pkg/api/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/api/v1alpha1/zz_generated.deepcopy.go @@ -95,7 +95,6 @@ func (in *ActorTemplateSpec) DeepCopyInto(out *ActorTemplateSpec) { } out.SnapshotsConfig = in.SnapshotsConfig out.WorkerPoolRef = in.WorkerPoolRef - in.Runsc.DeepCopyInto(&out.Runsc) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ActorTemplateSpec. @@ -132,21 +131,16 @@ func (in *ActorTemplateStatus) DeepCopy() *ActorTemplateStatus { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *AuthenticationConfig) DeepCopyInto(out *AuthenticationConfig) { +func (in *AssetFile) DeepCopyInto(out *AssetFile) { *out = *in - if in.GCP != nil { - in, out := &in.GCP, &out.GCP - *out = new(GCPAuthenticationConfig) - **out = **in - } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AuthenticationConfig. -func (in *AuthenticationConfig) DeepCopy() *AuthenticationConfig { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AssetFile. +func (in *AssetFile) DeepCopy() *AssetFile { if in == nil { return nil } - out := new(AuthenticationConfig) + out := new(AssetFile) in.DeepCopyInto(out) return out } @@ -224,57 +218,92 @@ func (in *EnvVarSource) DeepCopy() *EnvVarSource { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *GCPAuthenticationConfig) DeepCopyInto(out *GCPAuthenticationConfig) { +func (in *SandboxConfig) DeepCopyInto(out *SandboxConfig) { *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GCPAuthenticationConfig. -func (in *GCPAuthenticationConfig) DeepCopy() *GCPAuthenticationConfig { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SandboxConfig. +func (in *SandboxConfig) DeepCopy() *SandboxConfig { if in == nil { return nil } - out := new(GCPAuthenticationConfig) + out := new(SandboxConfig) in.DeepCopyInto(out) return out } +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *SandboxConfig) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *RunscConfig) DeepCopyInto(out *RunscConfig) { +func (in *SandboxConfigList) DeepCopyInto(out *SandboxConfigList) { *out = *in - if in.AMD64 != nil { - in, out := &in.AMD64, &out.AMD64 - *out = new(RunscPlatformConfig) - **out = **in - } - if in.ARM64 != nil { - in, out := &in.ARM64, &out.ARM64 - *out = new(RunscPlatformConfig) - **out = **in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]SandboxConfig, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } } - in.Authentication.DeepCopyInto(&out.Authentication) } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RunscConfig. -func (in *RunscConfig) DeepCopy() *RunscConfig { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SandboxConfigList. +func (in *SandboxConfigList) DeepCopy() *SandboxConfigList { if in == nil { return nil } - out := new(RunscConfig) + out := new(SandboxConfigList) in.DeepCopyInto(out) return out } +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *SandboxConfigList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *RunscPlatformConfig) DeepCopyInto(out *RunscPlatformConfig) { +func (in *SandboxConfigSpec) DeepCopyInto(out *SandboxConfigSpec) { *out = *in + if in.Assets != nil { + in, out := &in.Assets, &out.Assets + *out = make(map[string]map[string]AssetFile, len(*in)) + for key, val := range *in { + var outVal map[string]AssetFile + if val == nil { + (*out)[key] = nil + } else { + inVal := (*in)[key] + in, out := &inVal, &outVal + *out = make(map[string]AssetFile, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + (*out)[key] = outVal + } + } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RunscPlatformConfig. -func (in *RunscPlatformConfig) DeepCopy() *RunscPlatformConfig { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SandboxConfigSpec. +func (in *SandboxConfigSpec) DeepCopy() *SandboxConfigSpec { if in == nil { return nil } - out := new(RunscPlatformConfig) + out := new(SandboxConfigSpec) in.DeepCopyInto(out) return out } diff --git a/pkg/client/clientset/versioned/typed/api/v1alpha1/api_client.go b/pkg/client/clientset/versioned/typed/api/v1alpha1/api_client.go index 9f8c1ee34..399677416 100644 --- a/pkg/client/clientset/versioned/typed/api/v1alpha1/api_client.go +++ b/pkg/client/clientset/versioned/typed/api/v1alpha1/api_client.go @@ -27,6 +27,7 @@ import ( type ApiV1alpha1Interface interface { RESTClient() rest.Interface ActorTemplatesGetter + SandboxConfigsGetter WorkerPoolsGetter } @@ -39,6 +40,10 @@ func (c *ApiV1alpha1Client) ActorTemplates(namespace string) ActorTemplateInterf return newActorTemplates(c, namespace) } +func (c *ApiV1alpha1Client) SandboxConfigs() SandboxConfigInterface { + return newSandboxConfigs(c) +} + func (c *ApiV1alpha1Client) WorkerPools(namespace string) WorkerPoolInterface { return newWorkerPools(c, namespace) } diff --git a/pkg/client/clientset/versioned/typed/api/v1alpha1/fake/fake_api_client.go b/pkg/client/clientset/versioned/typed/api/v1alpha1/fake/fake_api_client.go index f463d8f23..7e4068d20 100644 --- a/pkg/client/clientset/versioned/typed/api/v1alpha1/fake/fake_api_client.go +++ b/pkg/client/clientset/versioned/typed/api/v1alpha1/fake/fake_api_client.go @@ -30,6 +30,10 @@ func (c *FakeApiV1alpha1) ActorTemplates(namespace string) v1alpha1.ActorTemplat return newFakeActorTemplates(c, namespace) } +func (c *FakeApiV1alpha1) SandboxConfigs() v1alpha1.SandboxConfigInterface { + return newFakeSandboxConfigs(c) +} + func (c *FakeApiV1alpha1) WorkerPools(namespace string) v1alpha1.WorkerPoolInterface { return newFakeWorkerPools(c, namespace) } diff --git a/pkg/client/clientset/versioned/typed/api/v1alpha1/fake/fake_sandboxconfig.go b/pkg/client/clientset/versioned/typed/api/v1alpha1/fake/fake_sandboxconfig.go new file mode 100644 index 000000000..a6a43e374 --- /dev/null +++ b/pkg/client/clientset/versioned/typed/api/v1alpha1/fake/fake_sandboxconfig.go @@ -0,0 +1,50 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + v1alpha1 "github.com/agent-substrate/substrate/pkg/api/v1alpha1" + apiv1alpha1 "github.com/agent-substrate/substrate/pkg/client/clientset/versioned/typed/api/v1alpha1" + gentype "k8s.io/client-go/gentype" +) + +// fakeSandboxConfigs implements SandboxConfigInterface +type fakeSandboxConfigs struct { + *gentype.FakeClientWithList[*v1alpha1.SandboxConfig, *v1alpha1.SandboxConfigList] + Fake *FakeApiV1alpha1 +} + +func newFakeSandboxConfigs(fake *FakeApiV1alpha1) apiv1alpha1.SandboxConfigInterface { + return &fakeSandboxConfigs{ + gentype.NewFakeClientWithList[*v1alpha1.SandboxConfig, *v1alpha1.SandboxConfigList]( + fake.Fake, + "", + v1alpha1.SchemeGroupVersion.WithResource("sandboxconfigs"), + v1alpha1.SchemeGroupVersion.WithKind("SandboxConfig"), + func() *v1alpha1.SandboxConfig { return &v1alpha1.SandboxConfig{} }, + func() *v1alpha1.SandboxConfigList { return &v1alpha1.SandboxConfigList{} }, + func(dst, src *v1alpha1.SandboxConfigList) { dst.ListMeta = src.ListMeta }, + func(list *v1alpha1.SandboxConfigList) []*v1alpha1.SandboxConfig { + return gentype.ToPointerSlice(list.Items) + }, + func(list *v1alpha1.SandboxConfigList, items []*v1alpha1.SandboxConfig) { + list.Items = gentype.FromPointerSlice(items) + }, + ), + fake, + } +} diff --git a/pkg/client/clientset/versioned/typed/api/v1alpha1/generated_expansion.go b/pkg/client/clientset/versioned/typed/api/v1alpha1/generated_expansion.go index a253a7b30..fd8e929b6 100644 --- a/pkg/client/clientset/versioned/typed/api/v1alpha1/generated_expansion.go +++ b/pkg/client/clientset/versioned/typed/api/v1alpha1/generated_expansion.go @@ -18,4 +18,6 @@ package v1alpha1 type ActorTemplateExpansion interface{} +type SandboxConfigExpansion interface{} + type WorkerPoolExpansion interface{} diff --git a/pkg/client/clientset/versioned/typed/api/v1alpha1/sandboxconfig.go b/pkg/client/clientset/versioned/typed/api/v1alpha1/sandboxconfig.go new file mode 100644 index 000000000..45c49226d --- /dev/null +++ b/pkg/client/clientset/versioned/typed/api/v1alpha1/sandboxconfig.go @@ -0,0 +1,66 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by client-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + context "context" + + apiv1alpha1 "github.com/agent-substrate/substrate/pkg/api/v1alpha1" + scheme "github.com/agent-substrate/substrate/pkg/client/clientset/versioned/scheme" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + gentype "k8s.io/client-go/gentype" +) + +// SandboxConfigsGetter has a method to return a SandboxConfigInterface. +// A group's client should implement this interface. +type SandboxConfigsGetter interface { + SandboxConfigs() SandboxConfigInterface +} + +// SandboxConfigInterface has methods to work with SandboxConfig resources. +type SandboxConfigInterface interface { + Create(ctx context.Context, sandboxConfig *apiv1alpha1.SandboxConfig, opts v1.CreateOptions) (*apiv1alpha1.SandboxConfig, error) + Update(ctx context.Context, sandboxConfig *apiv1alpha1.SandboxConfig, opts v1.UpdateOptions) (*apiv1alpha1.SandboxConfig, error) + Delete(ctx context.Context, name string, opts v1.DeleteOptions) error + DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error + Get(ctx context.Context, name string, opts v1.GetOptions) (*apiv1alpha1.SandboxConfig, error) + List(ctx context.Context, opts v1.ListOptions) (*apiv1alpha1.SandboxConfigList, error) + Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) + Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *apiv1alpha1.SandboxConfig, err error) + SandboxConfigExpansion +} + +// sandboxConfigs implements SandboxConfigInterface +type sandboxConfigs struct { + *gentype.ClientWithList[*apiv1alpha1.SandboxConfig, *apiv1alpha1.SandboxConfigList] +} + +// newSandboxConfigs returns a SandboxConfigs +func newSandboxConfigs(c *ApiV1alpha1Client) *sandboxConfigs { + return &sandboxConfigs{ + gentype.NewClientWithList[*apiv1alpha1.SandboxConfig, *apiv1alpha1.SandboxConfigList]( + "sandboxconfigs", + c.RESTClient(), + scheme.ParameterCodec, + "", + func() *apiv1alpha1.SandboxConfig { return &apiv1alpha1.SandboxConfig{} }, + func() *apiv1alpha1.SandboxConfigList { return &apiv1alpha1.SandboxConfigList{} }, + ), + } +} diff --git a/pkg/client/informers/externalversions/api/v1alpha1/interface.go b/pkg/client/informers/externalversions/api/v1alpha1/interface.go index 475dde306..790e065f1 100644 --- a/pkg/client/informers/externalversions/api/v1alpha1/interface.go +++ b/pkg/client/informers/externalversions/api/v1alpha1/interface.go @@ -24,6 +24,8 @@ import ( type Interface interface { // ActorTemplates returns a ActorTemplateInformer. ActorTemplates() ActorTemplateInformer + // SandboxConfigs returns a SandboxConfigInformer. + SandboxConfigs() SandboxConfigInformer // WorkerPools returns a WorkerPoolInformer. WorkerPools() WorkerPoolInformer } @@ -44,6 +46,11 @@ func (v *version) ActorTemplates() ActorTemplateInformer { return &actorTemplateInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} } +// SandboxConfigs returns a SandboxConfigInformer. +func (v *version) SandboxConfigs() SandboxConfigInformer { + return &sandboxConfigInformer{factory: v.factory, tweakListOptions: v.tweakListOptions} +} + // WorkerPools returns a WorkerPoolInformer. func (v *version) WorkerPools() WorkerPoolInformer { return &workerPoolInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} diff --git a/pkg/client/informers/externalversions/api/v1alpha1/sandboxconfig.go b/pkg/client/informers/externalversions/api/v1alpha1/sandboxconfig.go new file mode 100644 index 000000000..bcfc8eb9b --- /dev/null +++ b/pkg/client/informers/externalversions/api/v1alpha1/sandboxconfig.go @@ -0,0 +1,99 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by informer-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + context "context" + time "time" + + pkgapiv1alpha1 "github.com/agent-substrate/substrate/pkg/api/v1alpha1" + versioned "github.com/agent-substrate/substrate/pkg/client/clientset/versioned" + internalinterfaces "github.com/agent-substrate/substrate/pkg/client/informers/externalversions/internalinterfaces" + apiv1alpha1 "github.com/agent-substrate/substrate/pkg/client/listers/api/v1alpha1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + watch "k8s.io/apimachinery/pkg/watch" + cache "k8s.io/client-go/tools/cache" +) + +// SandboxConfigInformer provides access to a shared informer and lister for +// SandboxConfigs. +type SandboxConfigInformer interface { + Informer() cache.SharedIndexInformer + Lister() apiv1alpha1.SandboxConfigLister +} + +type sandboxConfigInformer struct { + factory internalinterfaces.SharedInformerFactory + tweakListOptions internalinterfaces.TweakListOptionsFunc +} + +// NewSandboxConfigInformer constructs a new informer for SandboxConfig type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewSandboxConfigInformer(client versioned.Interface, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { + return NewFilteredSandboxConfigInformer(client, resyncPeriod, indexers, nil) +} + +// NewFilteredSandboxConfigInformer constructs a new informer for SandboxConfig type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewFilteredSandboxConfigInformer(client versioned.Interface, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { + return cache.NewSharedIndexInformer( + cache.ToListWatcherWithWatchListSemantics(&cache.ListWatch{ + ListFunc: func(options v1.ListOptions) (runtime.Object, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.ApiV1alpha1().SandboxConfigs().List(context.Background(), options) + }, + WatchFunc: func(options v1.ListOptions) (watch.Interface, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.ApiV1alpha1().SandboxConfigs().Watch(context.Background(), options) + }, + ListWithContextFunc: func(ctx context.Context, options v1.ListOptions) (runtime.Object, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.ApiV1alpha1().SandboxConfigs().List(ctx, options) + }, + WatchFuncWithContext: func(ctx context.Context, options v1.ListOptions) (watch.Interface, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.ApiV1alpha1().SandboxConfigs().Watch(ctx, options) + }, + }, client), + &pkgapiv1alpha1.SandboxConfig{}, + resyncPeriod, + indexers, + ) +} + +func (f *sandboxConfigInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { + return NewFilteredSandboxConfigInformer(client, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) +} + +func (f *sandboxConfigInformer) Informer() cache.SharedIndexInformer { + return f.factory.InformerFor(&pkgapiv1alpha1.SandboxConfig{}, f.defaultInformer) +} + +func (f *sandboxConfigInformer) Lister() apiv1alpha1.SandboxConfigLister { + return apiv1alpha1.NewSandboxConfigLister(f.Informer().GetIndexer()) +} diff --git a/pkg/client/informers/externalversions/generic.go b/pkg/client/informers/externalversions/generic.go index 7f0dd54cc..8c3b9c7a6 100644 --- a/pkg/client/informers/externalversions/generic.go +++ b/pkg/client/informers/externalversions/generic.go @@ -53,6 +53,8 @@ func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource // Group=api, Version=v1alpha1 case v1alpha1.SchemeGroupVersion.WithResource("actortemplates"): return &genericInformer{resource: resource.GroupResource(), informer: f.Api().V1alpha1().ActorTemplates().Informer()}, nil + case v1alpha1.SchemeGroupVersion.WithResource("sandboxconfigs"): + return &genericInformer{resource: resource.GroupResource(), informer: f.Api().V1alpha1().SandboxConfigs().Informer()}, nil case v1alpha1.SchemeGroupVersion.WithResource("workerpools"): return &genericInformer{resource: resource.GroupResource(), informer: f.Api().V1alpha1().WorkerPools().Informer()}, nil diff --git a/pkg/client/listers/api/v1alpha1/expansion_generated.go b/pkg/client/listers/api/v1alpha1/expansion_generated.go index 29221dba8..f10fdebdb 100644 --- a/pkg/client/listers/api/v1alpha1/expansion_generated.go +++ b/pkg/client/listers/api/v1alpha1/expansion_generated.go @@ -24,6 +24,10 @@ type ActorTemplateListerExpansion interface{} // ActorTemplateNamespaceLister. type ActorTemplateNamespaceListerExpansion interface{} +// SandboxConfigListerExpansion allows custom methods to be added to +// SandboxConfigLister. +type SandboxConfigListerExpansion interface{} + // WorkerPoolListerExpansion allows custom methods to be added to // WorkerPoolLister. type WorkerPoolListerExpansion interface{} diff --git a/pkg/client/listers/api/v1alpha1/sandboxconfig.go b/pkg/client/listers/api/v1alpha1/sandboxconfig.go new file mode 100644 index 000000000..a198028b9 --- /dev/null +++ b/pkg/client/listers/api/v1alpha1/sandboxconfig.go @@ -0,0 +1,46 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by lister-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + apiv1alpha1 "github.com/agent-substrate/substrate/pkg/api/v1alpha1" + labels "k8s.io/apimachinery/pkg/labels" + listers "k8s.io/client-go/listers" + cache "k8s.io/client-go/tools/cache" +) + +// SandboxConfigLister helps list SandboxConfigs. +// All objects returned here must be treated as read-only. +type SandboxConfigLister interface { + // List lists all SandboxConfigs in the indexer. + // Objects returned here must be treated as read-only. + List(selector labels.Selector) (ret []*apiv1alpha1.SandboxConfig, err error) + // Get retrieves the SandboxConfig from the index for a given name. + // Objects returned here must be treated as read-only. + Get(name string) (*apiv1alpha1.SandboxConfig, error) + SandboxConfigListerExpansion +} + +// sandboxConfigLister implements the SandboxConfigLister interface. +type sandboxConfigLister struct { + listers.ResourceIndexer[*apiv1alpha1.SandboxConfig] +} + +// NewSandboxConfigLister returns a new SandboxConfigLister. +func NewSandboxConfigLister(indexer cache.Indexer) SandboxConfigLister { + return &sandboxConfigLister{listers.New[*apiv1alpha1.SandboxConfig](indexer, apiv1alpha1.Resource("sandboxconfig"))} +} From 536ae8fb8c00da8052fe76e4355f0dbf80310720 Mon Sep 17 00:00:00 2001 From: Benjamin Elder Date: Tue, 16 Jun 2026 17:09:29 -0700 Subject: [PATCH 2/6] feat(ateletpb): replace RunscConfig with generic SandboxAssets RunRequest now carries a backend-agnostic SandboxAssets (sandbox_class + assets keyed by arch then asset name). CheckpointRequest/RestoreRequest reserve the old runsc field: checkpoint uses the version recorded on-node at Run/Restore, and restore reads it from the snapshot manifest. --- internal/proto/ateletpb/atelet.pb.go | 398 ++++++++++++--------------- internal/proto/ateletpb/atelet.proto | 52 ++-- 2 files changed, 201 insertions(+), 249 deletions(-) diff --git a/internal/proto/ateletpb/atelet.pb.go b/internal/proto/ateletpb/atelet.pb.go index a9bb734a7..03b0fe750 100644 --- a/internal/proto/ateletpb/atelet.pb.go +++ b/internal/proto/ateletpb/atelet.pb.go @@ -93,10 +93,13 @@ type RunRequest struct { ActorTemplateNamespace string `protobuf:"bytes,3,opt,name=actor_template_namespace,json=actorTemplateNamespace,proto3" json:"actor_template_namespace,omitempty"` ActorTemplateName string `protobuf:"bytes,4,opt,name=actor_template_name,json=actorTemplateName,proto3" json:"actor_template_name,omitempty"` ActorId string `protobuf:"bytes,5,opt,name=actor_id,json=actorId,proto3" json:"actor_id,omitempty"` - Runsc *RunscConfig `protobuf:"bytes,8,opt,name=runsc,proto3" json:"runsc,omitempty"` Spec *WorkloadSpec `protobuf:"bytes,7,opt,name=spec,proto3" json:"spec,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + // The sandbox binaries to use for booting this actor from scratch. atelet + // fetches the relevant assets and records them with the actor's on-node state + // so a later Checkpoint can pin the same version into the snapshot manifest. + SandboxAssets *SandboxAssets `protobuf:"bytes,9,opt,name=sandbox_assets,json=sandboxAssets,proto3" json:"sandbox_assets,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *RunRequest) Reset() { @@ -157,41 +160,46 @@ func (x *RunRequest) GetActorId() string { return "" } -func (x *RunRequest) GetRunsc() *RunscConfig { +func (x *RunRequest) GetSpec() *WorkloadSpec { if x != nil { - return x.Runsc + return x.Spec } return nil } -func (x *RunRequest) GetSpec() *WorkloadSpec { +func (x *RunRequest) GetSandboxAssets() *SandboxAssets { if x != nil { - return x.Spec + return x.SandboxAssets } return nil } -type GCPAuthenticationConfig struct { - state protoimpl.MessageState `protogen:"open.v1"` - Use bool `protobuf:"varint,1,opt,name=use,proto3" json:"use,omitempty"` +// AssetFile is one content-addressed file atelet fetches for a sandbox runtime +// (e.g. the gVisor runsc binary). +type AssetFile struct { + state protoimpl.MessageState `protogen:"open.v1"` + // gs:// URL to download the asset from. + Url string `protobuf:"bytes,1,opt,name=url,proto3" json:"url,omitempty"` + // Lower-case hex SHA256; names the cached file and verifies its integrity. + Sha256 string `protobuf:"bytes,2,opt,name=sha256,proto3" json:"sha256,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } -func (x *GCPAuthenticationConfig) Reset() { - *x = GCPAuthenticationConfig{} +func (x *AssetFile) Reset() { + *x = AssetFile{} mi := &file_atelet_proto_msgTypes[1] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } -func (x *GCPAuthenticationConfig) String() string { +func (x *AssetFile) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GCPAuthenticationConfig) ProtoMessage() {} +func (*AssetFile) ProtoMessage() {} -func (x *GCPAuthenticationConfig) ProtoReflect() protoreflect.Message { +func (x *AssetFile) ProtoReflect() protoreflect.Message { mi := &file_atelet_proto_msgTypes[1] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) @@ -203,39 +211,48 @@ func (x *GCPAuthenticationConfig) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use GCPAuthenticationConfig.ProtoReflect.Descriptor instead. -func (*GCPAuthenticationConfig) Descriptor() ([]byte, []int) { +// Deprecated: Use AssetFile.ProtoReflect.Descriptor instead. +func (*AssetFile) Descriptor() ([]byte, []int) { return file_atelet_proto_rawDescGZIP(), []int{1} } -func (x *GCPAuthenticationConfig) GetUse() bool { +func (x *AssetFile) GetUrl() string { if x != nil { - return x.Use + return x.Url } - return false + return "" +} + +func (x *AssetFile) GetSha256() string { + if x != nil { + return x.Sha256 + } + return "" } -type AuthenticationConfig struct { - state protoimpl.MessageState `protogen:"open.v1"` - Gcp *GCPAuthenticationConfig `protobuf:"bytes,1,opt,name=gcp,proto3" json:"gcp,omitempty"` +// ArchAssets is the set of assets for a single architecture, keyed by asset +// name (a wrapper message because proto map values cannot themselves be maps). +type ArchAssets struct { + state protoimpl.MessageState `protogen:"open.v1"` + Files map[string]*AssetFile `protobuf:"bytes,1,rep,name=files,proto3" json:"files,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` // asset name -> file unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } -func (x *AuthenticationConfig) Reset() { - *x = AuthenticationConfig{} +func (x *ArchAssets) Reset() { + *x = ArchAssets{} mi := &file_atelet_proto_msgTypes[2] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } -func (x *AuthenticationConfig) String() string { +func (x *ArchAssets) String() string { return protoimpl.X.MessageStringOf(x) } -func (*AuthenticationConfig) ProtoMessage() {} +func (*ArchAssets) ProtoMessage() {} -func (x *AuthenticationConfig) ProtoReflect() protoreflect.Message { +func (x *ArchAssets) ProtoReflect() protoreflect.Message { mi := &file_atelet_proto_msgTypes[2] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) @@ -247,43 +264,44 @@ func (x *AuthenticationConfig) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use AuthenticationConfig.ProtoReflect.Descriptor instead. -func (*AuthenticationConfig) Descriptor() ([]byte, []int) { +// Deprecated: Use ArchAssets.ProtoReflect.Descriptor instead. +func (*ArchAssets) Descriptor() ([]byte, []int) { return file_atelet_proto_rawDescGZIP(), []int{2} } -func (x *AuthenticationConfig) GetGcp() *GCPAuthenticationConfig { +func (x *ArchAssets) GetFiles() map[string]*AssetFile { if x != nil { - return x.Gcp + return x.Files } return nil } -type RunscPlatformConfig struct { - state protoimpl.MessageState `protogen:"open.v1"` - // Check the downloaded binary against this hash. Also used for naming the - // downloaded file to prevent collisions. - Sha256Hash string `protobuf:"bytes,1,opt,name=sha256_hash,json=sha256Hash,proto3" json:"sha256_hash,omitempty"` - // gs:// URL to download the runsc binary. - Url string `protobuf:"bytes,2,opt,name=url,proto3" json:"url,omitempty"` +// SandboxAssets is the generic, backend-agnostic description of the sandbox +// binaries for an actor: a sandbox class plus assets keyed first by +// architecture (GOARCH) and then by asset name. atelet's backend code +// interprets the asset names (gVisor expects "runsc"). +type SandboxAssets struct { + state protoimpl.MessageState `protogen:"open.v1"` + SandboxClass string `protobuf:"bytes,1,opt,name=sandbox_class,json=sandboxClass,proto3" json:"sandbox_class,omitempty"` // e.g. "gvisor" + Assets map[string]*ArchAssets `protobuf:"bytes,2,rep,name=assets,proto3" json:"assets,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` // arch -> {name -> file} unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } -func (x *RunscPlatformConfig) Reset() { - *x = RunscPlatformConfig{} +func (x *SandboxAssets) Reset() { + *x = SandboxAssets{} mi := &file_atelet_proto_msgTypes[3] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } -func (x *RunscPlatformConfig) String() string { +func (x *SandboxAssets) String() string { return protoimpl.X.MessageStringOf(x) } -func (*RunscPlatformConfig) ProtoMessage() {} +func (*SandboxAssets) ProtoMessage() {} -func (x *RunscPlatformConfig) ProtoReflect() protoreflect.Message { +func (x *SandboxAssets) ProtoReflect() protoreflect.Message { mi := &file_atelet_proto_msgTypes[3] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) @@ -295,82 +313,21 @@ func (x *RunscPlatformConfig) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use RunscPlatformConfig.ProtoReflect.Descriptor instead. -func (*RunscPlatformConfig) Descriptor() ([]byte, []int) { +// Deprecated: Use SandboxAssets.ProtoReflect.Descriptor instead. +func (*SandboxAssets) Descriptor() ([]byte, []int) { return file_atelet_proto_rawDescGZIP(), []int{3} } -func (x *RunscPlatformConfig) GetSha256Hash() string { - if x != nil { - return x.Sha256Hash - } - return "" -} - -func (x *RunscPlatformConfig) GetUrl() string { +func (x *SandboxAssets) GetSandboxClass() string { if x != nil { - return x.Url + return x.SandboxClass } return "" } -type RunscConfig struct { - state protoimpl.MessageState `protogen:"open.v1"` - Amd64 *RunscPlatformConfig `protobuf:"bytes,1,opt,name=amd64,proto3" json:"amd64,omitempty"` - Arm64 *RunscPlatformConfig `protobuf:"bytes,2,opt,name=arm64,proto3" json:"arm64,omitempty"` - // How should atelet authenticate to fetch the runsc binary? - Authentication *AuthenticationConfig `protobuf:"bytes,3,opt,name=authentication,proto3" json:"authentication,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache -} - -func (x *RunscConfig) Reset() { - *x = RunscConfig{} - mi := &file_atelet_proto_msgTypes[4] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) -} - -func (x *RunscConfig) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*RunscConfig) ProtoMessage() {} - -func (x *RunscConfig) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[4] +func (x *SandboxAssets) GetAssets() map[string]*ArchAssets { if x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use RunscConfig.ProtoReflect.Descriptor instead. -func (*RunscConfig) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{4} -} - -func (x *RunscConfig) GetAmd64() *RunscPlatformConfig { - if x != nil { - return x.Amd64 - } - return nil -} - -func (x *RunscConfig) GetArm64() *RunscPlatformConfig { - if x != nil { - return x.Arm64 - } - return nil -} - -func (x *RunscConfig) GetAuthentication() *AuthenticationConfig { - if x != nil { - return x.Authentication + return x.Assets } return nil } @@ -386,7 +343,7 @@ type WorkloadSpec struct { func (x *WorkloadSpec) Reset() { *x = WorkloadSpec{} - mi := &file_atelet_proto_msgTypes[5] + mi := &file_atelet_proto_msgTypes[4] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -398,7 +355,7 @@ func (x *WorkloadSpec) String() string { func (*WorkloadSpec) ProtoMessage() {} func (x *WorkloadSpec) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[5] + mi := &file_atelet_proto_msgTypes[4] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -411,7 +368,7 @@ func (x *WorkloadSpec) ProtoReflect() protoreflect.Message { // Deprecated: Use WorkloadSpec.ProtoReflect.Descriptor instead. func (*WorkloadSpec) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{5} + return file_atelet_proto_rawDescGZIP(), []int{4} } func (x *WorkloadSpec) GetContainers() []*Container { @@ -440,7 +397,7 @@ type Container struct { func (x *Container) Reset() { *x = Container{} - mi := &file_atelet_proto_msgTypes[6] + mi := &file_atelet_proto_msgTypes[5] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -452,7 +409,7 @@ func (x *Container) String() string { func (*Container) ProtoMessage() {} func (x *Container) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[6] + mi := &file_atelet_proto_msgTypes[5] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -465,7 +422,7 @@ func (x *Container) ProtoReflect() protoreflect.Message { // Deprecated: Use Container.ProtoReflect.Descriptor instead. func (*Container) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{6} + return file_atelet_proto_rawDescGZIP(), []int{5} } func (x *Container) GetName() string { @@ -506,7 +463,7 @@ type EnvEntry struct { func (x *EnvEntry) Reset() { *x = EnvEntry{} - mi := &file_atelet_proto_msgTypes[7] + mi := &file_atelet_proto_msgTypes[6] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -518,7 +475,7 @@ func (x *EnvEntry) String() string { func (*EnvEntry) ProtoMessage() {} func (x *EnvEntry) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[7] + mi := &file_atelet_proto_msgTypes[6] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -531,7 +488,7 @@ func (x *EnvEntry) ProtoReflect() protoreflect.Message { // Deprecated: Use EnvEntry.ProtoReflect.Descriptor instead. func (*EnvEntry) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{7} + return file_atelet_proto_rawDescGZIP(), []int{6} } func (x *EnvEntry) GetName() string { @@ -556,7 +513,7 @@ type RunResponse struct { func (x *RunResponse) Reset() { *x = RunResponse{} - mi := &file_atelet_proto_msgTypes[8] + mi := &file_atelet_proto_msgTypes[7] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -568,7 +525,7 @@ func (x *RunResponse) String() string { func (*RunResponse) ProtoMessage() {} func (x *RunResponse) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[8] + mi := &file_atelet_proto_msgTypes[7] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -581,7 +538,7 @@ func (x *RunResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use RunResponse.ProtoReflect.Descriptor instead. func (*RunResponse) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{8} + return file_atelet_proto_rawDescGZIP(), []int{7} } type LocalCheckpointConfiguration struct { @@ -595,7 +552,7 @@ type LocalCheckpointConfiguration struct { func (x *LocalCheckpointConfiguration) Reset() { *x = LocalCheckpointConfiguration{} - mi := &file_atelet_proto_msgTypes[9] + mi := &file_atelet_proto_msgTypes[8] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -607,7 +564,7 @@ func (x *LocalCheckpointConfiguration) String() string { func (*LocalCheckpointConfiguration) ProtoMessage() {} func (x *LocalCheckpointConfiguration) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[9] + mi := &file_atelet_proto_msgTypes[8] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -620,7 +577,7 @@ func (x *LocalCheckpointConfiguration) ProtoReflect() protoreflect.Message { // Deprecated: Use LocalCheckpointConfiguration.ProtoReflect.Descriptor instead. func (*LocalCheckpointConfiguration) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{9} + return file_atelet_proto_rawDescGZIP(), []int{8} } func (x *LocalCheckpointConfiguration) GetSnapshotPrefix() string { @@ -649,7 +606,7 @@ type ExternalCheckpointConfiguration struct { func (x *ExternalCheckpointConfiguration) Reset() { *x = ExternalCheckpointConfiguration{} - mi := &file_atelet_proto_msgTypes[10] + mi := &file_atelet_proto_msgTypes[9] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -661,7 +618,7 @@ func (x *ExternalCheckpointConfiguration) String() string { func (*ExternalCheckpointConfiguration) ProtoMessage() {} func (x *ExternalCheckpointConfiguration) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[10] + mi := &file_atelet_proto_msgTypes[9] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -674,7 +631,7 @@ func (x *ExternalCheckpointConfiguration) ProtoReflect() protoreflect.Message { // Deprecated: Use ExternalCheckpointConfiguration.ProtoReflect.Descriptor instead. func (*ExternalCheckpointConfiguration) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{10} + return file_atelet_proto_rawDescGZIP(), []int{9} } func (x *ExternalCheckpointConfiguration) GetSnapshotUriPrefix() string { @@ -690,7 +647,6 @@ type CheckpointRequest struct { ActorTemplateNamespace string `protobuf:"bytes,3,opt,name=actor_template_namespace,json=actorTemplateNamespace,proto3" json:"actor_template_namespace,omitempty"` ActorTemplateName string `protobuf:"bytes,4,opt,name=actor_template_name,json=actorTemplateName,proto3" json:"actor_template_name,omitempty"` ActorId string `protobuf:"bytes,5,opt,name=actor_id,json=actorId,proto3" json:"actor_id,omitempty"` - Runsc *RunscConfig `protobuf:"bytes,6,opt,name=runsc,proto3" json:"runsc,omitempty"` Spec *WorkloadSpec `protobuf:"bytes,7,opt,name=spec,proto3" json:"spec,omitempty"` Type CheckpointType `protobuf:"varint,9,opt,name=type,proto3,enum=atelet.CheckpointType" json:"type,omitempty"` // The checkpoint configuration, depending on the type. @@ -706,7 +662,7 @@ type CheckpointRequest struct { func (x *CheckpointRequest) Reset() { *x = CheckpointRequest{} - mi := &file_atelet_proto_msgTypes[11] + mi := &file_atelet_proto_msgTypes[10] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -718,7 +674,7 @@ func (x *CheckpointRequest) String() string { func (*CheckpointRequest) ProtoMessage() {} func (x *CheckpointRequest) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[11] + mi := &file_atelet_proto_msgTypes[10] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -731,7 +687,7 @@ func (x *CheckpointRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use CheckpointRequest.ProtoReflect.Descriptor instead. func (*CheckpointRequest) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{11} + return file_atelet_proto_rawDescGZIP(), []int{10} } func (x *CheckpointRequest) GetTargetAteomUid() string { @@ -762,13 +718,6 @@ func (x *CheckpointRequest) GetActorId() string { return "" } -func (x *CheckpointRequest) GetRunsc() *RunscConfig { - if x != nil { - return x.Runsc - } - return nil -} - func (x *CheckpointRequest) GetSpec() *WorkloadSpec { if x != nil { return x.Spec @@ -832,7 +781,7 @@ type CheckpointResponse struct { func (x *CheckpointResponse) Reset() { *x = CheckpointResponse{} - mi := &file_atelet_proto_msgTypes[12] + mi := &file_atelet_proto_msgTypes[11] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -844,7 +793,7 @@ func (x *CheckpointResponse) String() string { func (*CheckpointResponse) ProtoMessage() {} func (x *CheckpointResponse) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[12] + mi := &file_atelet_proto_msgTypes[11] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -857,7 +806,7 @@ func (x *CheckpointResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use CheckpointResponse.ProtoReflect.Descriptor instead. func (*CheckpointResponse) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{12} + return file_atelet_proto_rawDescGZIP(), []int{11} } type RestoreRequest struct { @@ -866,7 +815,6 @@ type RestoreRequest struct { ActorTemplateNamespace string `protobuf:"bytes,3,opt,name=actor_template_namespace,json=actorTemplateNamespace,proto3" json:"actor_template_namespace,omitempty"` ActorTemplateName string `protobuf:"bytes,4,opt,name=actor_template_name,json=actorTemplateName,proto3" json:"actor_template_name,omitempty"` ActorId string `protobuf:"bytes,5,opt,name=actor_id,json=actorId,proto3" json:"actor_id,omitempty"` - Runsc *RunscConfig `protobuf:"bytes,6,opt,name=runsc,proto3" json:"runsc,omitempty"` Spec *WorkloadSpec `protobuf:"bytes,7,opt,name=spec,proto3" json:"spec,omitempty"` Type CheckpointType `protobuf:"varint,9,opt,name=type,proto3,enum=atelet.CheckpointType" json:"type,omitempty"` // The checkpoint configuration, depending on the type. @@ -882,7 +830,7 @@ type RestoreRequest struct { func (x *RestoreRequest) Reset() { *x = RestoreRequest{} - mi := &file_atelet_proto_msgTypes[13] + mi := &file_atelet_proto_msgTypes[12] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -894,7 +842,7 @@ func (x *RestoreRequest) String() string { func (*RestoreRequest) ProtoMessage() {} func (x *RestoreRequest) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[13] + mi := &file_atelet_proto_msgTypes[12] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -907,7 +855,7 @@ func (x *RestoreRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use RestoreRequest.ProtoReflect.Descriptor instead. func (*RestoreRequest) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{13} + return file_atelet_proto_rawDescGZIP(), []int{12} } func (x *RestoreRequest) GetTargetAteomUid() string { @@ -938,13 +886,6 @@ func (x *RestoreRequest) GetActorId() string { return "" } -func (x *RestoreRequest) GetRunsc() *RunscConfig { - if x != nil { - return x.Runsc - } - return nil -} - func (x *RestoreRequest) GetSpec() *WorkloadSpec { if x != nil { return x.Spec @@ -1008,7 +949,7 @@ type RestoreResponse struct { func (x *RestoreResponse) Reset() { *x = RestoreResponse{} - mi := &file_atelet_proto_msgTypes[14] + mi := &file_atelet_proto_msgTypes[13] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -1020,7 +961,7 @@ func (x *RestoreResponse) String() string { func (*RestoreResponse) ProtoMessage() {} func (x *RestoreResponse) ProtoReflect() protoreflect.Message { - mi := &file_atelet_proto_msgTypes[14] + mi := &file_atelet_proto_msgTypes[13] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -1033,34 +974,38 @@ func (x *RestoreResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use RestoreResponse.ProtoReflect.Descriptor instead. func (*RestoreResponse) Descriptor() ([]byte, []int) { - return file_atelet_proto_rawDescGZIP(), []int{14} + return file_atelet_proto_rawDescGZIP(), []int{13} } var File_atelet_proto protoreflect.FileDescriptor const file_atelet_proto_rawDesc = "" + "\n" + - "\fatelet.proto\x12\x06atelet\"\x90\x02\n" + + "\fatelet.proto\x12\x06atelet\"\xa9\x02\n" + "\n" + "RunRequest\x12(\n" + "\x10target_ateom_uid\x18\x01 \x01(\tR\x0etargetAteomUid\x128\n" + "\x18actor_template_namespace\x18\x03 \x01(\tR\x16actorTemplateNamespace\x12.\n" + "\x13actor_template_name\x18\x04 \x01(\tR\x11actorTemplateName\x12\x19\n" + - "\bactor_id\x18\x05 \x01(\tR\aactorId\x12)\n" + - "\x05runsc\x18\b \x01(\v2\x13.atelet.RunscConfigR\x05runsc\x12(\n" + - "\x04spec\x18\a \x01(\v2\x14.atelet.WorkloadSpecR\x04spec\"+\n" + - "\x17GCPAuthenticationConfig\x12\x10\n" + - "\x03use\x18\x01 \x01(\bR\x03use\"I\n" + - "\x14AuthenticationConfig\x121\n" + - "\x03gcp\x18\x01 \x01(\v2\x1f.atelet.GCPAuthenticationConfigR\x03gcp\"H\n" + - "\x13RunscPlatformConfig\x12\x1f\n" + - "\vsha256_hash\x18\x01 \x01(\tR\n" + - "sha256Hash\x12\x10\n" + - "\x03url\x18\x02 \x01(\tR\x03url\"\xb9\x01\n" + - "\vRunscConfig\x121\n" + - "\x05amd64\x18\x01 \x01(\v2\x1b.atelet.RunscPlatformConfigR\x05amd64\x121\n" + - "\x05arm64\x18\x02 \x01(\v2\x1b.atelet.RunscPlatformConfigR\x05arm64\x12D\n" + - "\x0eauthentication\x18\x03 \x01(\v2\x1c.atelet.AuthenticationConfigR\x0eauthentication\"b\n" + + "\bactor_id\x18\x05 \x01(\tR\aactorId\x12(\n" + + "\x04spec\x18\a \x01(\v2\x14.atelet.WorkloadSpecR\x04spec\x12<\n" + + "\x0esandbox_assets\x18\t \x01(\v2\x15.atelet.SandboxAssetsR\rsandboxAssetsJ\x04\b\b\x10\t\"5\n" + + "\tAssetFile\x12\x10\n" + + "\x03url\x18\x01 \x01(\tR\x03url\x12\x16\n" + + "\x06sha256\x18\x02 \x01(\tR\x06sha256\"\x8e\x01\n" + + "\n" + + "ArchAssets\x123\n" + + "\x05files\x18\x01 \x03(\v2\x1d.atelet.ArchAssets.FilesEntryR\x05files\x1aK\n" + + "\n" + + "FilesEntry\x12\x10\n" + + "\x03key\x18\x01 \x01(\tR\x03key\x12'\n" + + "\x05value\x18\x02 \x01(\v2\x11.atelet.AssetFileR\x05value:\x028\x01\"\xbe\x01\n" + + "\rSandboxAssets\x12#\n" + + "\rsandbox_class\x18\x01 \x01(\tR\fsandboxClass\x129\n" + + "\x06assets\x18\x02 \x03(\v2!.atelet.SandboxAssets.AssetsEntryR\x06assets\x1aM\n" + + "\vAssetsEntry\x12\x10\n" + + "\x03key\x18\x01 \x01(\tR\x03key\x12(\n" + + "\x05value\x18\x02 \x01(\v2\x12.atelet.ArchAssetsR\x05value:\x028\x01\"b\n" + "\fWorkloadSpec\x121\n" + "\n" + "containers\x18\x01 \x03(\v2\x11.atelet.ContainerR\n" + @@ -1079,32 +1024,30 @@ const file_atelet_proto_rawDesc = "" + "\x1cLocalCheckpointConfiguration\x12'\n" + "\x0fsnapshot_prefix\x18\x01 \x01(\tR\x0esnapshotPrefix\"Q\n" + "\x1fExternalCheckpointConfiguration\x12.\n" + - "\x13snapshot_uri_prefix\x18\x01 \x01(\tR\x11snapshotUriPrefix\"\xf2\x03\n" + + "\x13snapshot_uri_prefix\x18\x01 \x01(\tR\x11snapshotUriPrefix\"\xcd\x03\n" + "\x11CheckpointRequest\x12(\n" + "\x10target_ateom_uid\x18\x01 \x01(\tR\x0etargetAteomUid\x128\n" + "\x18actor_template_namespace\x18\x03 \x01(\tR\x16actorTemplateNamespace\x12.\n" + "\x13actor_template_name\x18\x04 \x01(\tR\x11actorTemplateName\x12\x19\n" + - "\bactor_id\x18\x05 \x01(\tR\aactorId\x12)\n" + - "\x05runsc\x18\x06 \x01(\v2\x13.atelet.RunscConfigR\x05runsc\x12(\n" + + "\bactor_id\x18\x05 \x01(\tR\aactorId\x12(\n" + "\x04spec\x18\a \x01(\v2\x14.atelet.WorkloadSpecR\x04spec\x12*\n" + "\x04type\x18\t \x01(\x0e2\x16.atelet.CheckpointTypeR\x04type\x12I\n" + "\flocal_config\x18\n" + " \x01(\v2$.atelet.LocalCheckpointConfigurationH\x00R\vlocalConfig\x12R\n" + "\x0fexternal_config\x18\v \x01(\v2'.atelet.ExternalCheckpointConfigurationH\x00R\x0eexternalConfigB\b\n" + - "\x06configJ\x04\b\b\x10\t\"\x14\n" + - "\x12CheckpointResponse\"\xef\x03\n" + + "\x06configJ\x04\b\x06\x10\aJ\x04\b\b\x10\t\"\x14\n" + + "\x12CheckpointResponse\"\xca\x03\n" + "\x0eRestoreRequest\x12(\n" + "\x10target_ateom_uid\x18\x01 \x01(\tR\x0etargetAteomUid\x128\n" + "\x18actor_template_namespace\x18\x03 \x01(\tR\x16actorTemplateNamespace\x12.\n" + "\x13actor_template_name\x18\x04 \x01(\tR\x11actorTemplateName\x12\x19\n" + - "\bactor_id\x18\x05 \x01(\tR\aactorId\x12)\n" + - "\x05runsc\x18\x06 \x01(\v2\x13.atelet.RunscConfigR\x05runsc\x12(\n" + + "\bactor_id\x18\x05 \x01(\tR\aactorId\x12(\n" + "\x04spec\x18\a \x01(\v2\x14.atelet.WorkloadSpecR\x04spec\x12*\n" + "\x04type\x18\t \x01(\x0e2\x16.atelet.CheckpointTypeR\x04type\x12I\n" + "\flocal_config\x18\n" + " \x01(\v2$.atelet.LocalCheckpointConfigurationH\x00R\vlocalConfig\x12R\n" + "\x0fexternal_config\x18\v \x01(\v2'.atelet.ExternalCheckpointConfigurationH\x00R\x0eexternalConfigB\b\n" + - "\x06configJ\x04\b\b\x10\t\"\x11\n" + + "\x06configJ\x04\b\x06\x10\aJ\x04\b\b\x10\t\"\x11\n" + "\x0fRestoreResponse*j\n" + "\x0eCheckpointType\x12\x1f\n" + "\x1bCHECKPOINT_TYPE_UNSPECIFIED\x10\x00\x12\x19\n" + @@ -1129,55 +1072,54 @@ func file_atelet_proto_rawDescGZIP() []byte { } var file_atelet_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_atelet_proto_msgTypes = make([]protoimpl.MessageInfo, 15) +var file_atelet_proto_msgTypes = make([]protoimpl.MessageInfo, 16) var file_atelet_proto_goTypes = []any{ (CheckpointType)(0), // 0: atelet.CheckpointType (*RunRequest)(nil), // 1: atelet.RunRequest - (*GCPAuthenticationConfig)(nil), // 2: atelet.GCPAuthenticationConfig - (*AuthenticationConfig)(nil), // 3: atelet.AuthenticationConfig - (*RunscPlatformConfig)(nil), // 4: atelet.RunscPlatformConfig - (*RunscConfig)(nil), // 5: atelet.RunscConfig - (*WorkloadSpec)(nil), // 6: atelet.WorkloadSpec - (*Container)(nil), // 7: atelet.Container - (*EnvEntry)(nil), // 8: atelet.EnvEntry - (*RunResponse)(nil), // 9: atelet.RunResponse - (*LocalCheckpointConfiguration)(nil), // 10: atelet.LocalCheckpointConfiguration - (*ExternalCheckpointConfiguration)(nil), // 11: atelet.ExternalCheckpointConfiguration - (*CheckpointRequest)(nil), // 12: atelet.CheckpointRequest - (*CheckpointResponse)(nil), // 13: atelet.CheckpointResponse - (*RestoreRequest)(nil), // 14: atelet.RestoreRequest - (*RestoreResponse)(nil), // 15: atelet.RestoreResponse + (*AssetFile)(nil), // 2: atelet.AssetFile + (*ArchAssets)(nil), // 3: atelet.ArchAssets + (*SandboxAssets)(nil), // 4: atelet.SandboxAssets + (*WorkloadSpec)(nil), // 5: atelet.WorkloadSpec + (*Container)(nil), // 6: atelet.Container + (*EnvEntry)(nil), // 7: atelet.EnvEntry + (*RunResponse)(nil), // 8: atelet.RunResponse + (*LocalCheckpointConfiguration)(nil), // 9: atelet.LocalCheckpointConfiguration + (*ExternalCheckpointConfiguration)(nil), // 10: atelet.ExternalCheckpointConfiguration + (*CheckpointRequest)(nil), // 11: atelet.CheckpointRequest + (*CheckpointResponse)(nil), // 12: atelet.CheckpointResponse + (*RestoreRequest)(nil), // 13: atelet.RestoreRequest + (*RestoreResponse)(nil), // 14: atelet.RestoreResponse + nil, // 15: atelet.ArchAssets.FilesEntry + nil, // 16: atelet.SandboxAssets.AssetsEntry } var file_atelet_proto_depIdxs = []int32{ - 5, // 0: atelet.RunRequest.runsc:type_name -> atelet.RunscConfig - 6, // 1: atelet.RunRequest.spec:type_name -> atelet.WorkloadSpec - 2, // 2: atelet.AuthenticationConfig.gcp:type_name -> atelet.GCPAuthenticationConfig - 4, // 3: atelet.RunscConfig.amd64:type_name -> atelet.RunscPlatformConfig - 4, // 4: atelet.RunscConfig.arm64:type_name -> atelet.RunscPlatformConfig - 3, // 5: atelet.RunscConfig.authentication:type_name -> atelet.AuthenticationConfig - 7, // 6: atelet.WorkloadSpec.containers:type_name -> atelet.Container - 8, // 7: atelet.Container.env:type_name -> atelet.EnvEntry - 5, // 8: atelet.CheckpointRequest.runsc:type_name -> atelet.RunscConfig - 6, // 9: atelet.CheckpointRequest.spec:type_name -> atelet.WorkloadSpec - 0, // 10: atelet.CheckpointRequest.type:type_name -> atelet.CheckpointType - 10, // 11: atelet.CheckpointRequest.local_config:type_name -> atelet.LocalCheckpointConfiguration - 11, // 12: atelet.CheckpointRequest.external_config:type_name -> atelet.ExternalCheckpointConfiguration - 5, // 13: atelet.RestoreRequest.runsc:type_name -> atelet.RunscConfig - 6, // 14: atelet.RestoreRequest.spec:type_name -> atelet.WorkloadSpec - 0, // 15: atelet.RestoreRequest.type:type_name -> atelet.CheckpointType - 10, // 16: atelet.RestoreRequest.local_config:type_name -> atelet.LocalCheckpointConfiguration - 11, // 17: atelet.RestoreRequest.external_config:type_name -> atelet.ExternalCheckpointConfiguration - 1, // 18: atelet.AteomHerder.Run:input_type -> atelet.RunRequest - 12, // 19: atelet.AteomHerder.Checkpoint:input_type -> atelet.CheckpointRequest - 14, // 20: atelet.AteomHerder.Restore:input_type -> atelet.RestoreRequest - 9, // 21: atelet.AteomHerder.Run:output_type -> atelet.RunResponse - 13, // 22: atelet.AteomHerder.Checkpoint:output_type -> atelet.CheckpointResponse - 15, // 23: atelet.AteomHerder.Restore:output_type -> atelet.RestoreResponse - 21, // [21:24] is the sub-list for method output_type - 18, // [18:21] is the sub-list for method input_type - 18, // [18:18] is the sub-list for extension type_name - 18, // [18:18] is the sub-list for extension extendee - 0, // [0:18] is the sub-list for field type_name + 5, // 0: atelet.RunRequest.spec:type_name -> atelet.WorkloadSpec + 4, // 1: atelet.RunRequest.sandbox_assets:type_name -> atelet.SandboxAssets + 15, // 2: atelet.ArchAssets.files:type_name -> atelet.ArchAssets.FilesEntry + 16, // 3: atelet.SandboxAssets.assets:type_name -> atelet.SandboxAssets.AssetsEntry + 6, // 4: atelet.WorkloadSpec.containers:type_name -> atelet.Container + 7, // 5: atelet.Container.env:type_name -> atelet.EnvEntry + 5, // 6: atelet.CheckpointRequest.spec:type_name -> atelet.WorkloadSpec + 0, // 7: atelet.CheckpointRequest.type:type_name -> atelet.CheckpointType + 9, // 8: atelet.CheckpointRequest.local_config:type_name -> atelet.LocalCheckpointConfiguration + 10, // 9: atelet.CheckpointRequest.external_config:type_name -> atelet.ExternalCheckpointConfiguration + 5, // 10: atelet.RestoreRequest.spec:type_name -> atelet.WorkloadSpec + 0, // 11: atelet.RestoreRequest.type:type_name -> atelet.CheckpointType + 9, // 12: atelet.RestoreRequest.local_config:type_name -> atelet.LocalCheckpointConfiguration + 10, // 13: atelet.RestoreRequest.external_config:type_name -> atelet.ExternalCheckpointConfiguration + 2, // 14: atelet.ArchAssets.FilesEntry.value:type_name -> atelet.AssetFile + 3, // 15: atelet.SandboxAssets.AssetsEntry.value:type_name -> atelet.ArchAssets + 1, // 16: atelet.AteomHerder.Run:input_type -> atelet.RunRequest + 11, // 17: atelet.AteomHerder.Checkpoint:input_type -> atelet.CheckpointRequest + 13, // 18: atelet.AteomHerder.Restore:input_type -> atelet.RestoreRequest + 8, // 19: atelet.AteomHerder.Run:output_type -> atelet.RunResponse + 12, // 20: atelet.AteomHerder.Checkpoint:output_type -> atelet.CheckpointResponse + 14, // 21: atelet.AteomHerder.Restore:output_type -> atelet.RestoreResponse + 19, // [19:22] is the sub-list for method output_type + 16, // [16:19] is the sub-list for method input_type + 16, // [16:16] is the sub-list for extension type_name + 16, // [16:16] is the sub-list for extension extendee + 0, // [0:16] is the sub-list for field type_name } func init() { file_atelet_proto_init() } @@ -1185,11 +1127,11 @@ func file_atelet_proto_init() { if File_atelet_proto != nil { return } - file_atelet_proto_msgTypes[11].OneofWrappers = []any{ + file_atelet_proto_msgTypes[10].OneofWrappers = []any{ (*CheckpointRequest_LocalConfig)(nil), (*CheckpointRequest_ExternalConfig)(nil), } - file_atelet_proto_msgTypes[13].OneofWrappers = []any{ + file_atelet_proto_msgTypes[12].OneofWrappers = []any{ (*RestoreRequest_LocalConfig)(nil), (*RestoreRequest_ExternalConfig)(nil), } @@ -1199,7 +1141,7 @@ func file_atelet_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_atelet_proto_rawDesc), len(file_atelet_proto_rawDesc)), NumEnums: 1, - NumMessages: 15, + NumMessages: 16, NumExtensions: 0, NumServices: 1, }, diff --git a/internal/proto/ateletpb/atelet.proto b/internal/proto/ateletpb/atelet.proto index acaf7a730..d210656d6 100644 --- a/internal/proto/ateletpb/atelet.proto +++ b/internal/proto/ateletpb/atelet.proto @@ -39,34 +39,38 @@ message RunRequest { string actor_template_name = 4; string actor_id = 5; - RunscConfig runsc = 8; + reserved 8; // was RunscConfig runsc WorkloadSpec spec = 7; -} -message GCPAuthenticationConfig { - bool use = 1; + // The sandbox binaries to use for booting this actor from scratch. atelet + // fetches the relevant assets and records them with the actor's on-node state + // so a later Checkpoint can pin the same version into the snapshot manifest. + SandboxAssets sandbox_assets = 9; } -message AuthenticationConfig { - GCPAuthenticationConfig gcp = 1; +// AssetFile is one content-addressed file atelet fetches for a sandbox runtime +// (e.g. the gVisor runsc binary). +message AssetFile { + // gs:// URL to download the asset from. + string url = 1; + // Lower-case hex SHA256; names the cached file and verifies its integrity. + string sha256 = 2; } -message RunscPlatformConfig { - // Check the downloaded binary against this hash. Also used for naming the - // downloaded file to prevent collisions. - string sha256_hash = 1; - - // gs:// URL to download the runsc binary. - string url = 2; +// ArchAssets is the set of assets for a single architecture, keyed by asset +// name (a wrapper message because proto map values cannot themselves be maps). +message ArchAssets { + map files = 1; // asset name -> file } -message RunscConfig { - RunscPlatformConfig amd64 = 1; - RunscPlatformConfig arm64 = 2; - - // How should atelet authenticate to fetch the runsc binary? - AuthenticationConfig authentication = 3; +// SandboxAssets is the generic, backend-agnostic description of the sandbox +// binaries for an actor: a sandbox class plus assets keyed first by +// architecture (GOARCH) and then by asset name. atelet's backend code +// interprets the asset names (gVisor expects "runsc"). +message SandboxAssets { + string sandbox_class = 1; // e.g. "gvisor" + map assets = 2; // arch -> {name -> file} } // WorkloadSpec parallels Pod, but with far fewer configurable fields. @@ -126,7 +130,10 @@ message CheckpointRequest { string actor_template_name = 4; string actor_id = 5; - RunscConfig runsc = 6; + // Sandbox binary config is not sent on checkpoint: atelet uses the version the + // actor is currently running (recorded with the actor's on-node state at + // Run/Restore) and records it into the snapshot manifest. + reserved 6; // was RunscConfig runsc WorkloadSpec spec = 7; @@ -153,7 +160,10 @@ message RestoreRequest { string actor_template_name = 4; string actor_id = 5; - RunscConfig runsc = 6; + // Sandbox binary config is not sent on restore: the snapshot is + // self-describing. atelet reads the snapshot manifest to recover the pinned + // sandbox version that created it. + reserved 6; // was RunscConfig runsc WorkloadSpec spec = 7; From 8d1b7ea5236a0c99110772067142a2e6c38983ef Mon Sep 17 00:00:00 2001 From: Benjamin Elder Date: Tue, 16 Jun 2026 17:09:29 -0700 Subject: [PATCH 3/6] feat(atelet): fetch sandbox assets generically + snapshot manifest - Fetch sandbox binaries from RunRequest.SandboxAssets (content-addressed, cached); for gVisor this is the "runsc" asset passed to ateom as RunscPath. - Record the running version in per-actor on-node state at Run/Restore so a later Checkpoint can re-fetch and pin it (ateompath.ActorSandboxAssetsFile). - Write a snapshot manifest (manifest.json) beside the checkpoint images at Checkpoint and read it at Restore, so restore is self-describing across nodes. Adds ategcs.SendBytesToGCS for the manifest upload. --- cmd/atelet/internal/ategcs/objects.go | 17 ++ cmd/atelet/main.go | 215 ++++++++++++-------------- cmd/atelet/main_test.go | 16 +- cmd/atelet/sandbox_assets.go | 191 +++++++++++++++++++++++ internal/ateompath/ateompath.go | 13 ++ 5 files changed, 324 insertions(+), 128 deletions(-) create mode 100644 cmd/atelet/sandbox_assets.go diff --git a/cmd/atelet/internal/ategcs/objects.go b/cmd/atelet/internal/ategcs/objects.go index 5b28ec4c2..0a9c40f51 100644 --- a/cmd/atelet/internal/ategcs/objects.go +++ b/cmd/atelet/internal/ategcs/objects.go @@ -15,6 +15,7 @@ package ategcs import ( + "bytes" "context" "fmt" "io" @@ -57,6 +58,22 @@ func FetchFromGCS(ctx context.Context, client ObjectStorage, gsURL string) ([]by return content, nil } +// SendBytesToGCS uploads the given bytes (uncompressed) to gsURL. Intended for +// small objects such as the snapshot manifest. +func SendBytesToGCS(ctx context.Context, client ObjectStorage, gsURL string, content []byte) error { + ctx, span := tracer.Start(ctx, "sendBytesToGCS") + defer span.End() + + bucket, object, err := parseGCSURL(gsURL) + if err != nil { + return fmt.Errorf("while parsing URL: %w", err) + } + if err := client.PutObject(ctx, bucket, object, bytes.NewReader(content)); err != nil { + return fmt.Errorf("while putting object bucket=%q object=%q: %w", bucket, object, err) + } + return nil +} + func SendLocalFileToGCSWithZstd(ctx context.Context, client ObjectStorage, gsURL string, localFilePath string) (err error) { ctx, span := tracer.Start(ctx, "sendLocalFileToGCSWithZstd") defer span.End() diff --git a/cmd/atelet/main.go b/cmd/atelet/main.go index 7b3316038..7094f7315 100644 --- a/cmd/atelet/main.go +++ b/cmd/atelet/main.go @@ -15,10 +15,8 @@ package main import ( - "bytes" "context" - "crypto/sha256" - "encoding/hex" + "encoding/json" "errors" "fmt" "io" @@ -26,7 +24,6 @@ import ( "net" "os" "path/filepath" - "runtime" "strconv" "strings" @@ -211,79 +208,6 @@ func NewService( return wms } -func (s *AteomHerder) fetchRunsc(ctx context.Context, cfg *ateletpb.RunscConfig) (string, error) { - var platCfg *ateletpb.RunscPlatformConfig - switch runtime.GOARCH { - case "amd64": - platCfg = cfg.GetAmd64() - case "arm64": - platCfg = cfg.GetArm64() - } - - sha256Hash := platCfg.GetSha256Hash() - if err := resources.ValidateRunscHash(sha256Hash); err != nil { - return "", status.Error(codes.InvalidArgument, err.Error()) - } - - localPath := ateompath.RunSCBinaryPath(sha256Hash) - _, err := os.Stat(localPath) - if err == nil { // EQUALS nil - return localPath, nil - } else if !errors.Is(err, os.ErrNotExist) { - return "", fmt.Errorf("while stat-ing local file: %w", err) - } - - // Fetch the file. - - client := s.anonGCSClient - if cfg.GetAuthentication().GetGcp().GetUse() { - client = s.gcsClient - } - - content, err := ategcs.FetchFromGCS(ctx, client, platCfg.GetUrl()) - if err != nil { - return "", fmt.Errorf("while fetching %v: %w", platCfg.GetUrl(), err) - } - - // Check hash - sum := sha256.Sum256(content) - wantSum, err := hex.DecodeString(platCfg.GetSha256Hash()) - if err != nil { - return "", fmt.Errorf("while parsing sha256 hash: %w", err) - } - if !bytes.Equal(sum[:], wantSum) { - return "", fmt.Errorf("sha256 mismatch; got=%s want=%s", hex.EncodeToString(sum[:]), platCfg.GetSha256Hash()) - } - - tmpFileName, err := func() (string, error) { - localDir := filepath.Dir(localPath) - tmpFile, err := os.CreateTemp(localDir, filepath.Base(localPath)+"-download-") - if err != nil { - return "", fmt.Errorf("while temp file: %w", err) - } - defer tmpFile.Close() - - if _, err := tmpFile.Write(content); err != nil { - return "", fmt.Errorf("while writing content to temp file: %w", err) - } - - if err := tmpFile.Chmod(0o755); err != nil { - return "", fmt.Errorf("while setting file mode: %w", err) - } - - return tmpFile.Name(), nil - }() - if err != nil { - return "", fmt.Errorf("while populating temp file: %w", err) - } - - if err := os.Rename(tmpFileName, localPath); err != nil { - return "", fmt.Errorf("while renaming temp file to target: %w", err) - } - - return localPath, nil -} - func (s *AteomHerder) Run(ctx context.Context, req *ateletpb.RunRequest) (*ateletpb.RunResponse, error) { if err := validateRunRequest(req); err != nil { // status.Error so the interceptor surfaces InvalidArgument and the @@ -291,17 +215,29 @@ func (s *AteomHerder) Run(ctx context.Context, req *ateletpb.RunRequest) (*atele return nil, status.Error(codes.InvalidArgument, err.Error()) } - runscPath, err := s.fetchRunscAndPrep(ctx, req.GetRunsc()) + ns, tmpl, actorID := req.GetActorTemplateNamespace(), req.GetActorTemplateName(), req.GetActorId() + + sandboxRec, err := recordFromRequest(req.GetSandboxAssets()) + if err != nil { + return nil, status.Error(codes.InvalidArgument, err.Error()) + } + runscPath, err := s.ensureSandboxBinary(ctx, sandboxRec) if err != nil { return nil, err } - if err := resetActorDirs(req.GetActorTemplateNamespace(), req.GetActorTemplateName(), req.GetActorId()); err != nil { + if err := resetActorDirs(ns, tmpl, actorID); err != nil { return nil, fmt.Errorf("while resetting actor dirs: %w", err) } - if err := s.prepareOCIBundles(ctx, - req.GetActorTemplateNamespace(), req.GetActorTemplateName(), req.GetActorId(), + // Record the sandbox binaries this actor is running so a later Checkpoint + // (whose request no longer carries the sandbox config) can re-fetch the same + // version and pin it into the snapshot manifest. + if err := writeSandboxRecord(ns, tmpl, actorID, sandboxRec); err != nil { + return nil, fmt.Errorf("while recording sandbox assets: %w", err) + } + + if err := s.prepareOCIBundles(ctx, ns, tmpl, actorID, req.GetSpec(), req.GetTargetAteomUid(), ); err != nil { return nil, err @@ -315,9 +251,9 @@ func (s *AteomHerder) Run(ctx context.Context, req *ateletpb.RunRequest) (*atele // Tell ateom to do runsc create + runsc start for pause container and // all application containers. if _, err := client.RunWorkload(ctx, &ateompb.RunWorkloadRequest{ - ActorTemplateNamespace: req.GetActorTemplateNamespace(), - ActorTemplateName: req.GetActorTemplateName(), - ActorId: req.GetActorId(), + ActorTemplateNamespace: ns, + ActorTemplateName: tmpl, + ActorId: actorID, RunscPath: runscPath, Spec: buildAteomWorkloadSpec(req.GetSpec()), }); err != nil { @@ -368,12 +304,22 @@ func (s *AteomHerder) Checkpoint(ctx context.Context, req *ateletpb.CheckpointRe return nil, status.Error(codes.InvalidArgument, err.Error()) } - runscPath, err := s.fetchRunscAndPrep(ctx, req.GetRunsc()) + ns, tmpl, actorID := req.GetActorTemplateNamespace(), req.GetActorTemplateName(), req.GetActorId() + + // Checkpoint requests no longer carry the sandbox config; recover the + // version this actor was started with from the on-node record and re-fetch + // it (a cache hit) so ateom can drive runsc, and so we can pin it into the + // snapshot manifest below. + sandboxRec, err := readSandboxRecord(ns, tmpl, actorID) + if err != nil { + return nil, fmt.Errorf("while loading recorded sandbox assets: %w", err) + } + runscPath, err := s.ensureSandboxBinary(ctx, sandboxRec) if err != nil { return nil, err } - checkpointDir := ateompath.CheckpointStateDir(req.GetActorTemplateNamespace(), req.GetActorTemplateName(), req.GetActorId()) + checkpointDir := ateompath.CheckpointStateDir(ns, tmpl, actorID) client, err := s.dialAteom(ctx, req.GetTargetAteomUid()) if err != nil { @@ -382,24 +328,22 @@ func (s *AteomHerder) Checkpoint(ctx context.Context, req *ateletpb.CheckpointRe // Tell ateom to take checkpoint and delete containers. if _, err := client.CheckpointWorkload(ctx, &ateompb.CheckpointWorkloadRequest{ - ActorTemplateNamespace: req.GetActorTemplateNamespace(), - ActorTemplateName: req.GetActorTemplateName(), - ActorId: req.GetActorId(), + ActorTemplateNamespace: ns, + ActorTemplateName: tmpl, + ActorId: actorID, RunscPath: runscPath, Spec: buildAteomWorkloadSpec(req.GetSpec()), }); err != nil { return nil, fmt.Errorf("while calling ateom.CheckpointWorkload: %w", err) } - ns, tmpl, actorID := req.GetActorTemplateNamespace(), req.GetActorTemplateName(), req.GetActorId() - switch req.GetType() { case ateletpb.CheckpointType_CHECKPOINT_TYPE_EXTERNAL: - if err := s.uploadExternalCheckpoint(ctx, req, checkpointDir); err != nil { + if err := s.uploadExternalCheckpoint(ctx, req, checkpointDir, sandboxRec); err != nil { return nil, err } case ateletpb.CheckpointType_CHECKPOINT_TYPE_LOCAL: - if err := s.moveLocalCheckpoint(ctx, req, checkpointDir); err != nil { + if err := s.moveLocalCheckpoint(ctx, req, checkpointDir, sandboxRec); err != nil { return nil, err } default: @@ -413,7 +357,7 @@ func (s *AteomHerder) Checkpoint(ctx context.Context, req *ateletpb.CheckpointRe return &ateletpb.CheckpointResponse{}, nil } -func (s *AteomHerder) moveLocalCheckpoint(ctx context.Context, req *ateletpb.CheckpointRequest, checkpointDir string) error { +func (s *AteomHerder) moveLocalCheckpoint(ctx context.Context, req *ateletpb.CheckpointRequest, checkpointDir string, rec *sandboxAssetsRecord) error { localCheckpointPath := filepath.Join(ateompath.LocalCheckpointsDir(req.GetActorTemplateNamespace(), req.GetActorTemplateName(), req.GetActorId()), req.GetLocalConfig().GetSnapshotPrefix()) if err := os.MkdirAll(localCheckpointPath, 0o700); err != nil { return fmt.Errorf("while creating local checkpoint directory: %w", err) @@ -431,10 +375,20 @@ func (s *AteomHerder) moveLocalCheckpoint(ctx context.Context, req *ateletpb.Che } } + // Pin the sandbox binaries into a manifest beside the images so a later + // Restore is self-describing. + manifest, err := json.Marshal(rec) + if err != nil { + return fmt.Errorf("while marshaling snapshot manifest: %w", err) + } + if err := os.WriteFile(filepath.Join(localCheckpointPath, sandboxManifestName), manifest, 0o600); err != nil { + return fmt.Errorf("while writing snapshot manifest: %w", err) + } + return nil } -func (s *AteomHerder) uploadExternalCheckpoint(ctx context.Context, req *ateletpb.CheckpointRequest, checkpointDir string) error { +func (s *AteomHerder) uploadExternalCheckpoint(ctx context.Context, req *ateletpb.CheckpointRequest, checkpointDir string, rec *sandboxAssetsRecord) error { ns, tmpl := req.GetActorTemplateNamespace(), req.GetActorTemplateName() prefix := strings.TrimSuffix(req.GetExternalConfig().GetSnapshotUriPrefix(), "/") @@ -466,6 +420,16 @@ func (s *AteomHerder) uploadExternalCheckpoint(ctx context.Context, req *ateletp ); err != nil { return err } + + // Pin the sandbox binaries into a manifest beside the images so a Restore on + // any node is self-describing. + manifest, err := json.Marshal(rec) + if err != nil { + return fmt.Errorf("while marshaling snapshot manifest: %w", err) + } + if err := ategcs.SendBytesToGCS(ctx, s.gcsClient, prefix+"/"+sandboxManifestName, manifest); err != nil { + return fmt.Errorf("while uploading snapshot manifest: %w", err) + } return nil } @@ -474,33 +438,56 @@ func (s *AteomHerder) Restore(ctx context.Context, req *ateletpb.RestoreRequest) return nil, status.Error(codes.InvalidArgument, err.Error()) } - runscPath, err := s.fetchRunscAndPrep(ctx, req.GetRunsc()) - if err != nil { - return nil, err - } - ns, tmpl, actorID := req.GetActorTemplateNamespace(), req.GetActorTemplateName(), req.GetActorId() if err := resetActorDirs(ns, tmpl, actorID); err != nil { return nil, fmt.Errorf("while resetting actor dirs: %w", err) } - checkpointDir := ateompath.RestoreStateDir(req.GetActorTemplateNamespace(), req.GetActorTemplateName(), req.GetActorId()) + checkpointDir := ateompath.RestoreStateDir(ns, tmpl, actorID) + + // The snapshot is self-describing: recover the sandbox binaries that created + // it from the manifest stored beside the checkpoint images (the Restore + // request no longer carries the sandbox config). + var sandboxRec *sandboxAssetsRecord switch req.GetType() { case ateletpb.CheckpointType_CHECKPOINT_TYPE_EXTERNAL: - if err := s.downloadExternalCheckpoint(ctx, req.GetExternalConfig().GetSnapshotUriPrefix(), checkpointDir); err != nil { + prefix := req.GetExternalConfig().GetSnapshotUriPrefix() + manifest, err := ategcs.FetchFromGCS(ctx, s.gcsClient, strings.TrimSuffix(prefix, "/")+"/"+sandboxManifestName) + if err != nil { + return nil, fmt.Errorf("while fetching snapshot manifest: %w", err) + } + sandboxRec, err = unmarshalSandboxRecord(manifest) + if err != nil { + return nil, err + } + if err := s.downloadExternalCheckpoint(ctx, prefix, checkpointDir); err != nil { return nil, err } case ateletpb.CheckpointType_CHECKPOINT_TYPE_LOCAL: // TODO(dberkov): the old pause checkpoint files are not deleted after they are copied to checkpointDir. This needs to be fixed in following PR. - localCheckpointDir := ateompath.LocalCheckpointsDir(req.GetActorTemplateNamespace(), req.GetActorTemplateName(), req.GetActorId()) - if err := s.copyLocalCheckpoint(ctx, req.GetLocalConfig().GetSnapshotPrefix(), localCheckpointDir, checkpointDir); err != nil { + localCheckpointDir := ateompath.LocalCheckpointsDir(ns, tmpl, actorID) + snapshotPrefix := req.GetLocalConfig().GetSnapshotPrefix() + manifest, err := os.ReadFile(filepath.Join(localCheckpointDir, snapshotPrefix, sandboxManifestName)) + if err != nil { + return nil, fmt.Errorf("while reading local snapshot manifest: %w", err) + } + sandboxRec, err = unmarshalSandboxRecord(manifest) + if err != nil { + return nil, err + } + if err := s.copyLocalCheckpoint(ctx, snapshotPrefix, localCheckpointDir, checkpointDir); err != nil { return nil, err } default: return nil, fmt.Errorf("unexpected checkpoint type: %v", req.GetType()) } + runscPath, err := s.ensureSandboxBinary(ctx, sandboxRec) + if err != nil { + return nil, err + } + if err := s.prepareOCIBundles(ctx, ns, tmpl, actorID, req.GetSpec(), req.GetTargetAteomUid(), ); err != nil { @@ -524,6 +511,12 @@ func (s *AteomHerder) Restore(ctx context.Context, req *ateletpb.RestoreRequest) return nil, fmt.Errorf("while calling ateom.RestoreWorkload: %w", err) } + // Record the (manifest-pinned) sandbox binaries on-node so a subsequent + // Checkpoint of this restored actor can re-pin the same version. + if err := writeSandboxRecord(ns, tmpl, actorID, sandboxRec); err != nil { + return nil, fmt.Errorf("while recording sandbox assets: %w", err) + } + return &ateletpb.RestoreResponse{}, nil } @@ -594,20 +587,6 @@ func (s *AteomHerder) downloadExternalCheckpoint(ctx context.Context, snapshotUr return nil } -// fetchRunscAndPrep ensures the static files dir exists and downloads the -// runsc binary at the version pinned by the request. Returns the local -// runsc path. -func (s *AteomHerder) fetchRunscAndPrep(ctx context.Context, runscCfg *ateletpb.RunscConfig) (string, error) { - if err := os.MkdirAll(ateompath.StaticFilesDir, 0o700); err != nil { - return "", fmt.Errorf("while creating static files dir: %w", err) - } - runscPath, err := s.fetchRunsc(ctx, runscCfg) - if err != nil { - return "", fmt.Errorf("in fetchRunsc: %w", err) - } - return runscPath, nil -} - // prepareOCIBundles pulls images and assembles OCI bundles for the pause // container and every application container in spec, in parallel. func (s *AteomHerder) prepareOCIBundles( diff --git a/cmd/atelet/main_test.go b/cmd/atelet/main_test.go index ddf86370b..e76953636 100644 --- a/cmd/atelet/main_test.go +++ b/cmd/atelet/main_test.go @@ -234,15 +234,14 @@ func TestValidateRestoreRequest(t *testing.T) { } } -// TestFetchRunscRejectsBadHash confirms fetchRunsc validates the runsc hash +// TestFetchAssetRejectsBadHash confirms fetchAsset validates the asset hash // before the cache-hit os.Stat/early-return, not merely "at some point". To // prove the ordering, it plants a real file at the exact path an invalid hash -// resolves to: a correctly-ordered fetchRunsc validates first and returns an +// resolves to: a correctly-ordered fetchAsset validates first and returns an // error, while a regression that stats first would find this file and return it // with a nil error, failing the test. StaticFilesDir is redirected to a temp -// dir so the planted path is writable and isolated. Both arch fields are set so -// the test is independent of the host GOARCH. -func TestFetchRunscRejectsBadHash(t *testing.T) { +// dir so the planted path is writable and isolated. +func TestFetchAssetRejectsBadHash(t *testing.T) { orig := ateompath.StaticFilesDir ateompath.StaticFilesDir = t.TempDir() t.Cleanup(func() { ateompath.StaticFilesDir = orig }) @@ -255,11 +254,8 @@ func TestFetchRunscRejectsBadHash(t *testing.T) { } s := &AteomHerder{} - bad := &ateletpb.RunscPlatformConfig{Sha256Hash: badHash} - cfg := &ateletpb.RunscConfig{Amd64: bad, Arm64: bad} - - if _, err := s.fetchRunsc(context.Background(), cfg); err == nil { - t.Error("fetchRunsc returned a cache hit for an invalid hash; validation must run before the os.Stat early return") + if _, err := s.fetchAsset(context.Background(), assetEntry{SHA256: badHash}); err == nil { + t.Error("fetchAsset returned a cache hit for an invalid hash; validation must run before the os.Stat early return") } } diff --git a/cmd/atelet/sandbox_assets.go b/cmd/atelet/sandbox_assets.go new file mode 100644 index 000000000..9b06d2a7d --- /dev/null +++ b/cmd/atelet/sandbox_assets.go @@ -0,0 +1,191 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "runtime" + + "github.com/agent-substrate/substrate/cmd/atelet/internal/ategcs" + "github.com/agent-substrate/substrate/internal/ateompath" + "github.com/agent-substrate/substrate/internal/proto/ateletpb" + "github.com/agent-substrate/substrate/internal/resources" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +// sandboxManifestName is the object/file name of the per-snapshot manifest that +// records which sandbox binaries created a snapshot. It is written next to the +// checkpoint images (in the external object store, or the local checkpoint dir) +// so a Restore — possibly on another node — is self-describing. +const sandboxManifestName = "manifest.json" + +// assetEntry is one content-addressed sandbox asset (url + sha256). +type assetEntry struct { + URL string `json:"url"` + SHA256 string `json:"sha256"` +} + +// sandboxAssetsRecord is the sandbox runtime an actor is running, projected onto +// the local node's architecture: the sandbox class plus the asset set keyed by +// asset name (gVisor uses a single "runsc" asset). It is both the per-actor +// on-node record (written at Run/Restore, read at Checkpoint) and the snapshot +// manifest (written at Checkpoint, read at Restore). +type sandboxAssetsRecord struct { + SandboxClass string `json:"sandboxClass"` + Assets map[string]assetEntry `json:"assets"` +} + +// recordFromRequest projects a request's per-architecture SandboxAssets onto the +// local node's architecture. +func recordFromRequest(sa *ateletpb.SandboxAssets) (*sandboxAssetsRecord, error) { + if sa == nil { + return nil, fmt.Errorf("missing sandbox_assets") + } + arch := runtime.GOARCH + archAssets := sa.GetAssets()[arch] + if archAssets == nil || len(archAssets.GetFiles()) == 0 { + return nil, fmt.Errorf("sandbox_assets has no assets for architecture %q", arch) + } + rec := &sandboxAssetsRecord{ + SandboxClass: sa.GetSandboxClass(), + Assets: make(map[string]assetEntry, len(archAssets.GetFiles())), + } + for name, f := range archAssets.GetFiles() { + rec.Assets[name] = assetEntry{URL: f.GetUrl(), SHA256: f.GetSha256()} + } + return rec, nil +} + +// ensureSandboxBinary fetches the sandbox binary an actor needs and returns its +// local path. For gVisor this is the single "runsc" asset, passed to ateom as +// RunscPath. Binaries are content-addressed and cached, so re-fetching at +// Checkpoint/Restore is a no-op once present. +func (s *AteomHerder) ensureSandboxBinary(ctx context.Context, rec *sandboxAssetsRecord) (string, error) { + if err := os.MkdirAll(ateompath.StaticFilesDir, 0o700); err != nil { + return "", fmt.Errorf("while creating static files dir: %w", err) + } + // gVisor uses a single "runsc" asset. + entry, ok := rec.Assets["runsc"] + if !ok { + return "", status.Errorf(codes.InvalidArgument, "sandbox assets for class %q missing required %q file", rec.SandboxClass, "runsc") + } + return s.fetchAsset(ctx, entry) +} + +// fetchAsset downloads one content-addressed asset (verifying its sha256) into +// the shared static-files cache and returns its local path. On a cache hit it +// returns immediately. +func (s *AteomHerder) fetchAsset(ctx context.Context, entry assetEntry) (string, error) { + if err := resources.ValidateRunscHash(entry.SHA256); err != nil { + return "", status.Error(codes.InvalidArgument, err.Error()) + } + + localPath := ateompath.RunSCBinaryPath(entry.SHA256) + _, err := os.Stat(localPath) + if err == nil { // EQUALS nil + return localPath, nil + } else if !errors.Is(err, os.ErrNotExist) { + return "", fmt.Errorf("while stat-ing local file: %w", err) + } + + // gVisor's runsc lives in the public gs://gvisor bucket, so the anonymous + // client suffices. TODO: drive authenticated asset fetches from atelet + // configuration for assets in private buckets. + content, err := ategcs.FetchFromGCS(ctx, s.anonGCSClient, entry.URL) + if err != nil { + return "", fmt.Errorf("while fetching %v: %w", entry.URL, err) + } + + sum := sha256.Sum256(content) + wantSum, err := hex.DecodeString(entry.SHA256) + if err != nil { + return "", fmt.Errorf("while parsing sha256 hash: %w", err) + } + if !bytes.Equal(sum[:], wantSum) { + return "", fmt.Errorf("sha256 mismatch; got=%s want=%s", hex.EncodeToString(sum[:]), entry.SHA256) + } + + tmpFileName, err := func() (string, error) { + localDir := filepath.Dir(localPath) + tmpFile, err := os.CreateTemp(localDir, filepath.Base(localPath)+"-download-") + if err != nil { + return "", fmt.Errorf("while temp file: %w", err) + } + defer tmpFile.Close() + + if _, err := tmpFile.Write(content); err != nil { + return "", fmt.Errorf("while writing content to temp file: %w", err) + } + if err := tmpFile.Chmod(0o755); err != nil { + return "", fmt.Errorf("while setting file mode: %w", err) + } + return tmpFile.Name(), nil + }() + if err != nil { + return "", fmt.Errorf("while populating temp file: %w", err) + } + + if err := os.Rename(tmpFileName, localPath); err != nil { + return "", fmt.Errorf("while renaming temp file to target: %w", err) + } + + return localPath, nil +} + +// writeSandboxRecord persists the actor's running sandbox assets on-node so a +// later Checkpoint (whose request no longer carries the sandbox config) can +// re-fetch the same binaries and pin them into the snapshot manifest. +func writeSandboxRecord(actorTemplateNamespace, actorTemplateName, actorID string, rec *sandboxAssetsRecord) error { + data, err := json.Marshal(rec) + if err != nil { + return fmt.Errorf("while marshaling sandbox record: %w", err) + } + path := ateompath.ActorSandboxAssetsFile(actorTemplateNamespace, actorTemplateName, actorID) + if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil { + return fmt.Errorf("while creating actor dir: %w", err) + } + if err := writeFileAtomic(path, data, 0o600); err != nil { + return fmt.Errorf("while writing sandbox record: %w", err) + } + return nil +} + +// readSandboxRecord loads the actor's on-node sandbox record written at +// Run/Restore. +func readSandboxRecord(actorTemplateNamespace, actorTemplateName, actorID string) (*sandboxAssetsRecord, error) { + path := ateompath.ActorSandboxAssetsFile(actorTemplateNamespace, actorTemplateName, actorID) + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("while reading sandbox record %s: %w", path, err) + } + return unmarshalSandboxRecord(data) +} + +func unmarshalSandboxRecord(data []byte) (*sandboxAssetsRecord, error) { + rec := &sandboxAssetsRecord{} + if err := json.Unmarshal(data, rec); err != nil { + return nil, fmt.Errorf("while parsing sandbox record/manifest: %w", err) + } + return rec, nil +} diff --git a/internal/ateompath/ateompath.go b/internal/ateompath/ateompath.go index 9e846dc7f..a779755c4 100644 --- a/internal/ateompath/ateompath.go +++ b/internal/ateompath/ateompath.go @@ -80,6 +80,19 @@ func ActorIdentityDirPath(actorTemplateNamespace, actorTemplateName, actorID str ) } +// ActorSandboxAssetsFile is the per-actor file where atelet records the sandbox +// binaries (class + content-addressed asset set, for this node's architecture) +// the actor is currently running. It is written at Run/Restore and read at +// Checkpoint (when the request no longer carries the sandbox config). It lives +// directly under ActorPath — NOT under a subdir wiped by atelet's +// resetActorDirs — so it survives between Run and a later Checkpoint. +func ActorSandboxAssetsFile(actorTemplateNamespace, actorTemplateName, actorID string) string { + return filepath.Join( + ActorPath(actorTemplateNamespace, actorTemplateName, actorID), + "sandbox-assets.json", + ) +} + func RunSCStateDir(actorTemplateNamespace, actorTemplateName, actorID string) string { return filepath.Join( ActorPath(actorTemplateNamespace, actorTemplateName, actorID), From 4247c11a499e39c6529ecfe5804463342870f81c Mon Sep 17 00:00:00 2001 From: Benjamin Elder Date: Tue, 16 Jun 2026 17:09:47 -0700 Subject: [PATCH 4/6] feat(ateapi): resolve sandbox binaries from WorkerPool SandboxConfig On boot-from-spec (Run), resolve the actor's WorkerPool, take its SandboxClass, and pick the named or cluster-default SandboxConfig, sending the result as RunRequest.SandboxAssets. Checkpoint/Restore no longer send sandbox config (atelet uses the on-node record / snapshot manifest). - New resolveSandboxAssets helper; thread WorkerPool + SandboxConfig listers through NewService/NewActorWorkflow into the resume Run step. - Drop the RunscConfig plumbing from resume/pause/suspend. - Grant ate-api-server get/watch/list on workerpools + sandboxconfigs. - functional_test: create a WorkerPool + default gvisor SandboxConfig. --- .../internal/controlapi/functional_test.go | 78 +++++++++++-- .../internal/controlapi/sandbox_assets.go | 104 ++++++++++++++++++ cmd/ateapi/internal/controlapi/service.go | 11 +- cmd/ateapi/internal/controlapi/workflow.go | 15 ++- .../internal/controlapi/workflow_pause.go | 23 +--- .../internal/controlapi/workflow_resume.go | 40 +++---- .../internal/controlapi/workflow_suspend.go | 23 +--- cmd/ateapi/main.go | 4 +- manifests/ate-install/ate-api-server.yaml | 2 +- 9 files changed, 222 insertions(+), 78 deletions(-) create mode 100644 cmd/ateapi/internal/controlapi/sandbox_assets.go diff --git a/cmd/ateapi/internal/controlapi/functional_test.go b/cmd/ateapi/internal/controlapi/functional_test.go index e49ece3f4..e45025e49 100644 --- a/cmd/ateapi/internal/controlapi/functional_test.go +++ b/cmd/ateapi/internal/controlapi/functional_test.go @@ -231,6 +231,8 @@ type testContext struct { fakeAtelet *FakeAteletServer cleanup func() actorTemplateLister listersv1alpha1.ActorTemplateLister + workerPoolLister listersv1alpha1.WorkerPoolLister + sandboxConfigLister listersv1alpha1.SandboxConfigLister } // setupTest sets up a fully isolated test environment. @@ -266,6 +268,8 @@ func setupTest(t *testing.T, ns string) *testContext { substrateInformerFactory := externalversions.NewSharedInformerFactory(substrateClient, 0) actorTemplateLister := substrateInformerFactory.Api().V1alpha1().ActorTemplates().Lister() + workerPoolLister := substrateInformerFactory.Api().V1alpha1().WorkerPools().Lister() + sandboxConfigLister := substrateInformerFactory.Api().V1alpha1().SandboxConfigs().Lister() ctx, cancel := context.WithCancel(context.Background()) @@ -282,7 +286,7 @@ func setupTest(t *testing.T, ns string) *testContext { // 4. Initialize Service dialer := NewAteletDialer(workerInformer.GetIndexer(), ateletInformer.GetIndexer()) - service := NewService(persistence, actorTemplateLister, dialer, k8sClient) + service := NewService(persistence, actorTemplateLister, workerPoolLister, sandboxConfigLister, dialer, k8sClient) // 5. Start REAL gRPC Server for ATE API grpcServer := grpc.NewServer(grpc.UnaryInterceptor(ateinterceptors.ServerUnaryInterceptor)) @@ -343,6 +347,8 @@ func setupTest(t *testing.T, ns string) *testContext { fakeAtelet: fakeAtelet, cleanup: cleanup, actorTemplateLister: actorTemplateLister, + workerPoolLister: workerPoolLister, + sandboxConfigLister: sandboxConfigLister, } } @@ -363,18 +369,20 @@ func createTemplate(t *testing.T, tc *testContext, ns string) { func createTemplateWithContainers(t *testing.T, tc *testContext, ns string, containers []atev1alpha1.Container) { t.Helper() + + // Sandbox binaries now live on a (cluster-scoped) SandboxConfig resolved via + // the actor's WorkerPool, not on the ActorTemplate. Create a default gvisor + // SandboxConfig and the pool the template references so a boot-from-spec Run + // can resolve its assets. + ensureDefaultGvisorSandboxConfig(t, tc) + ensureWorkerPool(t, tc, ns, "pool1") + actorTemplate := &atev1alpha1.ActorTemplate{ ObjectMeta: metav1.ObjectMeta{ Name: "tmpl1", Namespace: ns, }, Spec: atev1alpha1.ActorTemplateSpec{ - Runsc: atev1alpha1.RunscConfig{ - AMD64: &atev1alpha1.RunscPlatformConfig{ - URL: "gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc", - SHA256Hash: "a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63", - }, - }, PauseImage: "pause@sha256:abc", SnapshotsConfig: atev1alpha1.SnapshotsConfig{ Location: "gs://fake-fake-fake", @@ -413,6 +421,62 @@ func createTemplateWithContainers(t *testing.T, tc *testContext, ns string, cont } } +// ensureDefaultGvisorSandboxConfig creates the cluster-scoped default gvisor +// SandboxConfig (idempotently) and waits for it to appear in the lister. +func ensureDefaultGvisorSandboxConfig(t *testing.T, tc *testContext) { + t.Helper() + const name = "gvisor-default" + sc := &atev1alpha1.SandboxConfig{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Spec: atev1alpha1.SandboxConfigSpec{ + SandboxClass: atev1alpha1.SandboxClassGvisor, + Default: true, + Assets: map[string]map[string]atev1alpha1.AssetFile{ + "amd64": {"runsc": { + URL: "gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc", + SHA256: "a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63", + }}, + "arm64": {"runsc": { + URL: "gs://gvisor/releases/nightly/2026-05-19/aarch64/runsc", + SHA256: "1ba2366ae2efceba166046f51a4104f9261c9cb72c6db8f5b3fe2dc57dea86b9", + }}, + }, + }, + } + if _, err := tc.substrateClient.ApiV1alpha1().SandboxConfigs().Create(context.Background(), sc, metav1.CreateOptions{}); err != nil && !apierrors.IsAlreadyExists(err) { + t.Fatalf("failed to create default SandboxConfig: %v", err) + } + if err := wait.PollUntilContextTimeout(context.Background(), 100*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) { + _, err := tc.sandboxConfigLister.Get(name) + return err == nil, nil + }); err != nil { + t.Fatalf("default SandboxConfig not synced into lister: %v", err) + } +} + +// ensureWorkerPool creates the namespaced WorkerPool an ActorTemplate references +// (idempotently) and waits for it to appear in the lister. +func ensureWorkerPool(t *testing.T, tc *testContext, ns, name string) { + t.Helper() + wp := &atev1alpha1.WorkerPool{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns}, + Spec: atev1alpha1.WorkerPoolSpec{ + Replicas: 1, + AteomImage: "ateom@sha256:abc", + SandboxClass: atev1alpha1.SandboxClassGvisor, + }, + } + if _, err := tc.substrateClient.ApiV1alpha1().WorkerPools(ns).Create(context.Background(), wp, metav1.CreateOptions{}); err != nil && !apierrors.IsAlreadyExists(err) { + t.Fatalf("failed to create WorkerPool: %v", err) + } + if err := wait.PollUntilContextTimeout(context.Background(), 100*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) { + _, err := tc.workerPoolLister.WorkerPools(ns).Get(name) + return err == nil, nil + }); err != nil { + t.Fatalf("WorkerPool not synced into lister: %v", err) + } +} + func createWorkerPod(t *testing.T, tc *testContext, ns string, name string, nodeName string) { t.Helper() pod := &corev1.Pod{ diff --git a/cmd/ateapi/internal/controlapi/sandbox_assets.go b/cmd/ateapi/internal/controlapi/sandbox_assets.go new file mode 100644 index 000000000..db76fb74e --- /dev/null +++ b/cmd/ateapi/internal/controlapi/sandbox_assets.go @@ -0,0 +1,104 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package controlapi + +import ( + "fmt" + + "github.com/agent-substrate/substrate/internal/proto/ateletpb" + atev1alpha1 "github.com/agent-substrate/substrate/pkg/api/v1alpha1" + listersv1alpha1 "github.com/agent-substrate/substrate/pkg/client/listers/api/v1alpha1" + "k8s.io/apimachinery/pkg/labels" +) + +// resolveSandboxAssets determines the sandbox binaries an actor should boot with +// and projects them onto the ateletpb.SandboxAssets atelet fetches. It resolves +// the actor's WorkerPool, takes its SandboxClass (default gvisor), then picks the +// SandboxConfig named by the pool — or, if none is named, the cluster default +// SandboxConfig for that class. +func resolveSandboxAssets( + workerPoolLister listersv1alpha1.WorkerPoolLister, + sandboxConfigLister listersv1alpha1.SandboxConfigLister, + at *atev1alpha1.ActorTemplate, +) (*ateletpb.SandboxAssets, error) { + ref := at.Spec.WorkerPoolRef + wp, err := workerPoolLister.WorkerPools(ref.Namespace).Get(ref.Name) + if err != nil { + return nil, fmt.Errorf("while getting WorkerPool %s/%s: %w", ref.Namespace, ref.Name, err) + } + + class := wp.Spec.SandboxClass + if class == "" { + class = atev1alpha1.SandboxClassGvisor + } + + var sc *atev1alpha1.SandboxConfig + if name := wp.Spec.SandboxConfigName; name != "" { + sc, err = sandboxConfigLister.Get(name) + if err != nil { + return nil, fmt.Errorf("while getting SandboxConfig %q: %w", name, err) + } + if sc.Spec.SandboxClass != class { + return nil, fmt.Errorf("SandboxConfig %q has class %q but WorkerPool %s/%s is class %q", + name, sc.Spec.SandboxClass, ref.Namespace, ref.Name, class) + } + } else { + sc, err = defaultSandboxConfig(sandboxConfigLister, class) + if err != nil { + return nil, err + } + } + + return sandboxAssetsProto(class, sc), nil +} + +// defaultSandboxConfig returns the single SandboxConfig marked Default for the +// given class, erroring if there are zero or more than one. +func defaultSandboxConfig(lister listersv1alpha1.SandboxConfigLister, class atev1alpha1.SandboxClass) (*atev1alpha1.SandboxConfig, error) { + all, err := lister.List(labels.Everything()) + if err != nil { + return nil, fmt.Errorf("while listing SandboxConfigs: %w", err) + } + var match *atev1alpha1.SandboxConfig + for _, sc := range all { + if sc.Spec.SandboxClass == class && sc.Spec.Default { + if match != nil { + return nil, fmt.Errorf("multiple default SandboxConfigs for class %q (%q and %q)", class, match.Name, sc.Name) + } + match = sc + } + } + if match == nil { + return nil, fmt.Errorf("no default SandboxConfig for class %q; set one with spec.default=true or name one via WorkerPool.spec.sandboxConfigName", class) + } + return match, nil +} + +// sandboxAssetsProto converts a resolved SandboxConfig into the proto atelet +// consumes. +func sandboxAssetsProto(class atev1alpha1.SandboxClass, sc *atev1alpha1.SandboxConfig) *ateletpb.SandboxAssets { + out := &ateletpb.SandboxAssets{ + SandboxClass: string(class), + Assets: make(map[string]*ateletpb.ArchAssets, len(sc.Spec.Assets)), + } + for arch, files := range sc.Spec.Assets { + archAssets := &ateletpb.ArchAssets{Files: make(map[string]*ateletpb.AssetFile, len(files))} + for name, f := range files { + archAssets.Files[name] = &ateletpb.AssetFile{Url: f.URL, Sha256: f.SHA256} + } + out.Assets[arch] = archAssets + } + return out +} diff --git a/cmd/ateapi/internal/controlapi/service.go b/cmd/ateapi/internal/controlapi/service.go index 39841729e..6f3421403 100644 --- a/cmd/ateapi/internal/controlapi/service.go +++ b/cmd/ateapi/internal/controlapi/service.go @@ -33,12 +33,19 @@ type Service struct { var _ ateapipb.ControlServer = (*Service)(nil) // NewService creates a service. -func NewService(persistence store.Interface, actorTemplateLister listersv1alpha1.ActorTemplateLister, dialer *AteletDialer, kubeClient kubernetes.Interface) *Service { +func NewService( + persistence store.Interface, + actorTemplateLister listersv1alpha1.ActorTemplateLister, + workerPoolLister listersv1alpha1.WorkerPoolLister, + sandboxConfigLister listersv1alpha1.SandboxConfigLister, + dialer *AteletDialer, + kubeClient kubernetes.Interface, +) *Service { s := &Service{ persistence: persistence, actorTemplateLister: actorTemplateLister, dialer: dialer, - actorWorkflow: NewActorWorkflow(persistence, dialer, actorTemplateLister, kubeClient), + actorWorkflow: NewActorWorkflow(persistence, dialer, actorTemplateLister, workerPoolLister, sandboxConfigLister, kubeClient), } return s diff --git a/cmd/ateapi/internal/controlapi/workflow.go b/cmd/ateapi/internal/controlapi/workflow.go index de229259f..c5849b9c3 100644 --- a/cmd/ateapi/internal/controlapi/workflow.go +++ b/cmd/ateapi/internal/controlapi/workflow.go @@ -117,16 +117,27 @@ type ActorWorkflow struct { store store.Interface dialer *AteletDialer actorTemplateLister listersv1alpha1.ActorTemplateLister + workerPoolLister listersv1alpha1.WorkerPoolLister + sandboxConfigLister listersv1alpha1.SandboxConfigLister kubeClient kubernetes.Interface secretCache *envSecretCache } // NewActorWorkflow creates a new ActorWorkflow. -func NewActorWorkflow(store store.Interface, dialer *AteletDialer, actorTemplateLister listersv1alpha1.ActorTemplateLister, kubeClient kubernetes.Interface) *ActorWorkflow { +func NewActorWorkflow( + store store.Interface, + dialer *AteletDialer, + actorTemplateLister listersv1alpha1.ActorTemplateLister, + workerPoolLister listersv1alpha1.WorkerPoolLister, + sandboxConfigLister listersv1alpha1.SandboxConfigLister, + kubeClient kubernetes.Interface, +) *ActorWorkflow { return &ActorWorkflow{ store: store, dialer: dialer, actorTemplateLister: actorTemplateLister, + workerPoolLister: workerPoolLister, + sandboxConfigLister: sandboxConfigLister, kubeClient: kubeClient, secretCache: newEnvSecretCache(envSecretCacheTTL), } @@ -151,7 +162,7 @@ func (w *ActorWorkflow) ResumeActor(ctx context.Context, id string, boot bool) ( steps := []WorkflowStep[*ResumeInput, *ResumeState]{ &LoadActorForResumeStep{store: w.store, actorTemplateLister: w.actorTemplateLister}, &AssignWorkerStep{store: w.store}, - &CallAteletRestoreStep{dialer: w.dialer, kubeClient: w.kubeClient, secretCache: w.secretCache}, + &CallAteletRestoreStep{dialer: w.dialer, kubeClient: w.kubeClient, secretCache: w.secretCache, workerPoolLister: w.workerPoolLister, sandboxConfigLister: w.sandboxConfigLister}, &FinalizeRunningStep{store: w.store}, } diff --git a/cmd/ateapi/internal/controlapi/workflow_pause.go b/cmd/ateapi/internal/controlapi/workflow_pause.go index 4617c465e..1880f5df8 100644 --- a/cmd/ateapi/internal/controlapi/workflow_pause.go +++ b/cmd/ateapi/internal/controlapi/workflow_pause.go @@ -114,31 +114,14 @@ func (s *CallAteletPauseStep) Execute(ctx context.Context, input *PauseInput, st } client := ateletpb.NewAteomHerderClient(ateletConn) - runscCfg := &ateletpb.RunscConfig{} - if state.ActorTemplate.Spec.Runsc.AMD64 != nil { - runscCfg.Amd64 = &ateletpb.RunscPlatformConfig{ - Sha256Hash: state.ActorTemplate.Spec.Runsc.AMD64.SHA256Hash, - Url: state.ActorTemplate.Spec.Runsc.AMD64.URL, - } - } - if state.ActorTemplate.Spec.Runsc.ARM64 != nil { - runscCfg.Arm64 = &ateletpb.RunscPlatformConfig{ - Sha256Hash: state.ActorTemplate.Spec.Runsc.ARM64.SHA256Hash, - Url: state.ActorTemplate.Spec.Runsc.ARM64.URL, - } - } - if state.ActorTemplate.Spec.Runsc.Authentication.GCP != nil { - authnCfg := &ateletpb.AuthenticationConfig{} - authnCfg.Gcp = &ateletpb.GCPAuthenticationConfig{Use: true} - runscCfg.Authentication = authnCfg - } - + // Checkpoint does not carry the sandbox config: atelet uses the version the + // actor is currently running (recorded on-node at Run/Restore) and pins it + // into the snapshot manifest. req := &ateletpb.CheckpointRequest{ TargetAteomUid: state.Actor.GetAteomPodUid(), ActorTemplateNamespace: state.Actor.GetActorTemplateNamespace(), ActorTemplateName: state.Actor.GetActorTemplateName(), ActorId: state.Actor.GetActorId(), - Runsc: runscCfg, Spec: &ateletpb.WorkloadSpec{ PauseImage: state.ActorTemplate.Spec.PauseImage, }, diff --git a/cmd/ateapi/internal/controlapi/workflow_resume.go b/cmd/ateapi/internal/controlapi/workflow_resume.go index 83678d7ed..f0d81dfc8 100644 --- a/cmd/ateapi/internal/controlapi/workflow_resume.go +++ b/cmd/ateapi/internal/controlapi/workflow_resume.go @@ -162,9 +162,11 @@ func (s *AssignWorkerStep) findFreeWorker(workers []*ateapipb.Worker, workerPool } type CallAteletRestoreStep struct { - dialer *AteletDialer - kubeClient kubernetes.Interface - secretCache *envSecretCache + dialer *AteletDialer + kubeClient kubernetes.Interface + secretCache *envSecretCache + workerPoolLister listersv1alpha1.WorkerPoolLister + sandboxConfigLister listersv1alpha1.SandboxConfigLister } func (s *CallAteletRestoreStep) Name() string { return "CallAteletRestore" } @@ -183,25 +185,6 @@ func (s *CallAteletRestoreStep) Execute(ctx context.Context, input *ResumeInput, return err } - runscCfg := &ateletpb.RunscConfig{} - if state.ActorTemplate.Spec.Runsc.AMD64 != nil { - runscCfg.Amd64 = &ateletpb.RunscPlatformConfig{ - Sha256Hash: state.ActorTemplate.Spec.Runsc.AMD64.SHA256Hash, - Url: state.ActorTemplate.Spec.Runsc.AMD64.URL, - } - } - if state.ActorTemplate.Spec.Runsc.ARM64 != nil { - runscCfg.Arm64 = &ateletpb.RunscPlatformConfig{ - Sha256Hash: state.ActorTemplate.Spec.Runsc.ARM64.SHA256Hash, - Url: state.ActorTemplate.Spec.Runsc.ARM64.URL, - } - } - if state.ActorTemplate.Spec.Runsc.Authentication.GCP != nil { - authnCfg := &ateletpb.AuthenticationConfig{} - authnCfg.Gcp = &ateletpb.GCPAuthenticationConfig{Use: true} - runscCfg.Authentication = authnCfg - } - if state.Actor.GetLatestSnapshotInfo().GetType() != ateapipb.SnapshotType_SNAPSHOT_TYPE_UNSPECIFIED { slog.InfoContext(ctx, "Actor has snapshot; Restoring from snapshot") @@ -210,7 +193,6 @@ func (s *CallAteletRestoreStep) Execute(ctx context.Context, input *ResumeInput, ActorTemplateNamespace: state.Actor.GetActorTemplateNamespace(), ActorTemplateName: state.Actor.GetActorTemplateName(), ActorId: state.Actor.GetActorId(), - Runsc: runscCfg, Spec: workloadSpec, } switch state.Actor.GetLatestSnapshotInfo().GetType() { @@ -247,7 +229,6 @@ func (s *CallAteletRestoreStep) Execute(ctx context.Context, input *ResumeInput, ActorTemplateNamespace: state.Actor.GetActorTemplateNamespace(), ActorTemplateName: state.Actor.GetActorTemplateName(), ActorId: state.Actor.GetActorId(), - Runsc: runscCfg, Spec: workloadSpec, Type: ateletpb.CheckpointType_CHECKPOINT_TYPE_EXTERNAL, Config: &ateletpb.RestoreRequest_ExternalConfig{ @@ -263,12 +244,21 @@ func (s *CallAteletRestoreStep) Execute(ctx context.Context, input *ResumeInput, return nil } else { slog.InfoContext(ctx, "Actor has no snapshot; ActorTemplate has no golden snapshot; Booting from ActorTemplate spec") + + // Booting from scratch: resolve the sandbox binaries from the pool's + // SandboxConfig and send them so atelet can fetch and record them. + // (Restores above are self-describing via the snapshot manifest.) + sandboxAssets, err := resolveSandboxAssets(s.workerPoolLister, s.sandboxConfigLister, state.ActorTemplate) + if err != nil { + return fmt.Errorf("while resolving sandbox assets: %w", err) + } + req := &ateletpb.RunRequest{ TargetAteomUid: state.Actor.GetAteomPodUid(), ActorTemplateNamespace: state.Actor.GetActorTemplateNamespace(), ActorTemplateName: state.Actor.GetActorTemplateName(), ActorId: state.Actor.GetActorId(), - Runsc: runscCfg, + SandboxAssets: sandboxAssets, Spec: workloadSpec, } _, err = client.Run(ctx, req) diff --git a/cmd/ateapi/internal/controlapi/workflow_suspend.go b/cmd/ateapi/internal/controlapi/workflow_suspend.go index cd3d63870..6c33d8dee 100644 --- a/cmd/ateapi/internal/controlapi/workflow_suspend.go +++ b/cmd/ateapi/internal/controlapi/workflow_suspend.go @@ -116,31 +116,14 @@ func (s *CallAteletSuspendStep) Execute(ctx context.Context, input *SuspendInput } client := ateletpb.NewAteomHerderClient(ateletConn) - runscCfg := &ateletpb.RunscConfig{} - if state.ActorTemplate.Spec.Runsc.AMD64 != nil { - runscCfg.Amd64 = &ateletpb.RunscPlatformConfig{ - Sha256Hash: state.ActorTemplate.Spec.Runsc.AMD64.SHA256Hash, - Url: state.ActorTemplate.Spec.Runsc.AMD64.URL, - } - } - if state.ActorTemplate.Spec.Runsc.ARM64 != nil { - runscCfg.Arm64 = &ateletpb.RunscPlatformConfig{ - Sha256Hash: state.ActorTemplate.Spec.Runsc.ARM64.SHA256Hash, - Url: state.ActorTemplate.Spec.Runsc.ARM64.URL, - } - } - if state.ActorTemplate.Spec.Runsc.Authentication.GCP != nil { - authnCfg := &ateletpb.AuthenticationConfig{} - authnCfg.Gcp = &ateletpb.GCPAuthenticationConfig{Use: true} - runscCfg.Authentication = authnCfg - } - + // Checkpoint does not carry the sandbox config: atelet uses the version the + // actor is currently running (recorded on-node at Run/Restore) and pins it + // into the snapshot manifest. req := &ateletpb.CheckpointRequest{ TargetAteomUid: state.Actor.GetAteomPodUid(), ActorTemplateNamespace: state.Actor.GetActorTemplateNamespace(), ActorTemplateName: state.Actor.GetActorTemplateName(), ActorId: state.Actor.GetActorId(), - Runsc: runscCfg, Spec: &ateletpb.WorkloadSpec{ PauseImage: state.ActorTemplate.Spec.PauseImage, }, diff --git a/cmd/ateapi/main.go b/cmd/ateapi/main.go index 34101bb12..4d5a8bd37 100644 --- a/cmd/ateapi/main.go +++ b/cmd/ateapi/main.go @@ -113,6 +113,8 @@ func main() { ateFactory := externalversions.NewSharedInformerFactory(ateClient, 0) actorTemplateLister := ateFactory.Api().V1alpha1().ActorTemplates().Lister() + workerPoolLister := ateFactory.Api().V1alpha1().WorkerPools().Lister() + sandboxConfigLister := ateFactory.Api().V1alpha1().SandboxConfigs().Lister() workerPodInformerFactory, workerPodInformer := controlapi.WorkerPodInformer(clientset) ateletPodInformerFactory, ateletPodInformer := controlapi.AteletInformer(clientset) @@ -131,7 +133,7 @@ func main() { ateFactory.WaitForCacheSync(stopCh) dialer := controlapi.NewAteletDialer(workerPodInformer.GetIndexer(), ateletPodInformer.GetIndexer()) - sm := controlapi.NewService(redisPersistence, actorTemplateLister, dialer, clientset) + sm := controlapi.NewService(redisPersistence, actorTemplateLister, workerPoolLister, sandboxConfigLister, dialer, clientset) sessionIdentitySrv := sessionidentity.New(*clientJWTIssuer, *clientJWTAudience, *sessionIDJWTPoolFile, *sessionIDCAPoolFile, *workerpoolCACerts) diff --git a/manifests/ate-install/ate-api-server.yaml b/manifests/ate-install/ate-api-server.yaml index 8d3c17086..6a8fcd6a8 100644 --- a/manifests/ate-install/ate-api-server.yaml +++ b/manifests/ate-install/ate-api-server.yaml @@ -23,7 +23,7 @@ rules: resources: ["pods"] verbs: ["get", "watch", "list"] - apiGroups: ["ate.dev"] - resources: ["actortemplates"] + resources: ["actortemplates", "workerpools", "sandboxconfigs"] verbs: ["get", "watch", "list"] # Secret reads for env source resolution are intentionally NOT granted # cluster-wide here. Each demo / tenant is responsible for granting From 42b020251610820c6fdc8341cb70a92dd576338b Mon Sep 17 00:00:00 2001 From: Benjamin Elder Date: Tue, 16 Jun 2026 17:09:47 -0700 Subject: [PATCH 5/6] feat(install): default gVisor SandboxConfig; drop runsc from demos - Ship a cluster-default gvisor SandboxConfig (gvisor-default) carrying the runsc assets and apply it in install-ate.sh so gVisor pools work out of the box; ensure_crds also checks for the SandboxConfig CRD. - Remove the now-removed runsc block from all demo templates and the e2e probe fixture; they rely on the default SandboxConfig. - e2e demo_test: stop copying ActorTemplate.Runsc; carry the WorkerPool's SandboxClass/SandboxConfigName instead. --- demos/agent-secret/agent-secret.yaml.tmpl | 7 ---- .../claude-code-multiplex.yaml.tmpl | 21 ----------- demos/counter/counter.yaml.tmpl | 7 ---- demos/multi-template/multi-template.yaml.tmpl | 14 -------- demos/sandbox/sandbox.yaml.tmpl | 7 ---- hack/install-ate.sh | 8 ++++- internal/e2e/fixtures/probe/probe.yaml.tmpl | 7 ---- internal/e2e/suites/demo/demo_test.go | 7 ++-- .../ate-install/sandboxconfig-gvisor.yaml | 35 +++++++++++++++++++ 9 files changed, 46 insertions(+), 67 deletions(-) create mode 100644 manifests/ate-install/sandboxconfig-gvisor.yaml diff --git a/demos/agent-secret/agent-secret.yaml.tmpl b/demos/agent-secret/agent-secret.yaml.tmpl index c8d380486..d302f0151 100644 --- a/demos/agent-secret/agent-secret.yaml.tmpl +++ b/demos/agent-secret/agent-secret.yaml.tmpl @@ -60,13 +60,6 @@ metadata: name: agent-secret namespace: ate-demo-secret-agent-v2 spec: - runsc: - amd64: - url: "gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc" - sha256Hash: "a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63" - arm64: - url: "gs://gvisor/releases/nightly/2026-05-19/aarch64/runsc" - sha256Hash: "1ba2366ae2efceba166046f51a4104f9261c9cb72c6db8f5b3fe2dc57dea86b9" pauseImage: "registry.k8s.io/pause:3.10.2@sha256:f548e0e8e3dc1896ca956272154dde3314e8cc4fde0a57577ee9fa1c63f5baf4" containers: - name: agent-secret diff --git a/demos/claude-code-multiplex/claude-code-multiplex.yaml.tmpl b/demos/claude-code-multiplex/claude-code-multiplex.yaml.tmpl index 962055c94..5d77e5fd0 100644 --- a/demos/claude-code-multiplex/claude-code-multiplex.yaml.tmpl +++ b/demos/claude-code-multiplex/claude-code-multiplex.yaml.tmpl @@ -63,13 +63,6 @@ metadata: name: agent-luna namespace: claude-multiplex-demo spec: - runsc: - amd64: - url: "gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc" - sha256Hash: "a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63" - arm64: - url: "gs://gvisor/releases/nightly/2026-05-19/aarch64/runsc" - sha256Hash: "1ba2366ae2efceba166046f51a4104f9261c9cb72c6db8f5b3fe2dc57dea86b9" pauseImage: "registry.k8s.io/pause:3.10.2@sha256:f548e0e8e3dc1896ca956272154dde3314e8cc4fde0a57577ee9fa1c63f5baf4" containers: - name: claude @@ -100,13 +93,6 @@ metadata: name: agent-mars namespace: claude-multiplex-demo spec: - runsc: - amd64: - url: "gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc" - sha256Hash: "a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63" - arm64: - url: "gs://gvisor/releases/nightly/2026-05-19/aarch64/runsc" - sha256Hash: "1ba2366ae2efceba166046f51a4104f9261c9cb72c6db8f5b3fe2dc57dea86b9" pauseImage: "registry.k8s.io/pause:3.10.2@sha256:f548e0e8e3dc1896ca956272154dde3314e8cc4fde0a57577ee9fa1c63f5baf4" containers: - name: claude @@ -137,13 +123,6 @@ metadata: name: agent-orion namespace: claude-multiplex-demo spec: - runsc: - amd64: - url: "gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc" - sha256Hash: "a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63" - arm64: - url: "gs://gvisor/releases/nightly/2026-05-19/aarch64/runsc" - sha256Hash: "1ba2366ae2efceba166046f51a4104f9261c9cb72c6db8f5b3fe2dc57dea86b9" pauseImage: "registry.k8s.io/pause:3.10.2@sha256:f548e0e8e3dc1896ca956272154dde3314e8cc4fde0a57577ee9fa1c63f5baf4" containers: - name: claude diff --git a/demos/counter/counter.yaml.tmpl b/demos/counter/counter.yaml.tmpl index 3fa99bbf3..bff6269fb 100644 --- a/demos/counter/counter.yaml.tmpl +++ b/demos/counter/counter.yaml.tmpl @@ -36,13 +36,6 @@ metadata: name: counter namespace: ate-demo-counter spec: - runsc: - amd64: - url: "gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc" - sha256Hash: "a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63" - arm64: - url: "gs://gvisor/releases/nightly/2026-05-19/aarch64/runsc" - sha256Hash: "1ba2366ae2efceba166046f51a4104f9261c9cb72c6db8f5b3fe2dc57dea86b9" pauseImage: "registry.k8s.io/pause:3.10.2@sha256:f548e0e8e3dc1896ca956272154dde3314e8cc4fde0a57577ee9fa1c63f5baf4" containers: - name: counter diff --git a/demos/multi-template/multi-template.yaml.tmpl b/demos/multi-template/multi-template.yaml.tmpl index 2882f447e..3b2b149f8 100644 --- a/demos/multi-template/multi-template.yaml.tmpl +++ b/demos/multi-template/multi-template.yaml.tmpl @@ -50,13 +50,6 @@ metadata: name: counter namespace: ate-demo-multi-template-counter spec: - runsc: - amd64: - url: "gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc" - sha256Hash: "a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63" - arm64: - url: "gs://gvisor/releases/nightly/2026-05-19/aarch64/runsc" - sha256Hash: "1ba2366ae2efceba166046f51a4104f9261c9cb72c6db8f5b3fe2dc57dea86b9" pauseImage: "registry.k8s.io/pause:3.10.2@sha256:f548e0e8e3dc1896ca956272154dde3314e8cc4fde0a57577ee9fa1c63f5baf4" containers: - name: counter @@ -74,13 +67,6 @@ metadata: name: fspersist namespace: ate-demo-multi-template-fspersist spec: - runsc: - amd64: - url: "gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc" - sha256Hash: "a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63" - arm64: - url: "gs://gvisor/releases/nightly/2026-05-19/aarch64/runsc" - sha256Hash: "1ba2366ae2efceba166046f51a4104f9261c9cb72c6db8f5b3fe2dc57dea86b9" pauseImage: "registry.k8s.io/pause:3.10.2@sha256:f548e0e8e3dc1896ca956272154dde3314e8cc4fde0a57577ee9fa1c63f5baf4" containers: - name: fspersist diff --git a/demos/sandbox/sandbox.yaml.tmpl b/demos/sandbox/sandbox.yaml.tmpl index 70b834f4a..f0fd0cf38 100644 --- a/demos/sandbox/sandbox.yaml.tmpl +++ b/demos/sandbox/sandbox.yaml.tmpl @@ -35,13 +35,6 @@ spec: workerPoolRef: name: sandbox-workerpool namespace: ate-demo-sandbox - runsc: - amd64: - url: "gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc" - sha256Hash: "a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63" - arm64: - url: "gs://gvisor/releases/nightly/2026-05-19/aarch64/runsc" - sha256Hash: "1ba2366ae2efceba166046f51a4104f9261c9cb72c6db8f5b3fe2dc57dea86b9" pauseImage: "registry.k8s.io/pause:3.10.2@sha256:f548e0e8e3dc1896ca956272154dde3314e8cc4fde0a57577ee9fa1c63f5baf4" containers: - name: sandbox diff --git a/hack/install-ate.sh b/hack/install-ate.sh index 6c7f46056..35e01fbc1 100755 --- a/hack/install-ate.sh +++ b/hack/install-ate.sh @@ -202,7 +202,7 @@ create_api_server_env_vars() { ensure_crds() { log_step "ensure_crds" - if run_kubectl get crd workerpools.ate.dev actortemplates.ate.dev >/dev/null 2>&1; then + if run_kubectl get crd workerpools.ate.dev actortemplates.ate.dev sandboxconfigs.ate.dev >/dev/null 2>&1; then return fi @@ -218,6 +218,12 @@ deploy_ate_system() { log_step "deploy_ate_system" ensure_crds + # Install the cluster-wide default sandbox config(s). Sandbox binaries live on + # cluster-scoped SandboxConfigs resolved via each WorkerPool's SandboxClass + # (decoupled from ActorTemplate). gVisor pools resolve to this default unless + # they name their own SandboxConfig. + run_kubectl apply -f manifests/ate-install/sandboxconfig-gvisor.yaml + # Ensure namespace exists run_kubectl apply -f manifests/ate-install/ate-system-namespace.yaml \ && run_kubectl wait --for=jsonpath='{.status.phase}'=Active namespace/ate-system --timeout=60s diff --git a/internal/e2e/fixtures/probe/probe.yaml.tmpl b/internal/e2e/fixtures/probe/probe.yaml.tmpl index bd276a5e0..3110104fc 100644 --- a/internal/e2e/fixtures/probe/probe.yaml.tmpl +++ b/internal/e2e/fixtures/probe/probe.yaml.tmpl @@ -36,13 +36,6 @@ metadata: name: probe namespace: ate-e2e-probe spec: - runsc: - amd64: - url: "gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc" - sha256Hash: "a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63" - arm64: - url: "gs://gvisor/releases/nightly/2026-05-19/aarch64/runsc" - sha256Hash: "1ba2366ae2efceba166046f51a4104f9261c9cb72c6db8f5b3fe2dc57dea86b9" pauseImage: "registry.k8s.io/pause:3.10.2@sha256:f548e0e8e3dc1896ca956272154dde3314e8cc4fde0a57577ee9fa1c63f5baf4" containers: - name: probe diff --git a/internal/e2e/suites/demo/demo_test.go b/internal/e2e/suites/demo/demo_test.go index 50224de97..7a11d5d4c 100644 --- a/internal/e2e/suites/demo/demo_test.go +++ b/internal/e2e/suites/demo/demo_test.go @@ -320,8 +320,10 @@ func createActorTemplate(ctx context.Context, t *testing.T, clients *e2e.Clients Namespace: nsObj.Name, }, Spec: v1alpha1.WorkerPoolSpec{ - Replicas: 5, - AteomImage: existingWp.Spec.AteomImage, + Replicas: 5, + AteomImage: existingWp.Spec.AteomImage, + SandboxClass: existingWp.Spec.SandboxClass, + SandboxConfigName: existingWp.Spec.SandboxConfigName, }, } _, err = clients.SubstrateK8s.ApiV1alpha1().WorkerPools(nsObj.Name).Create(ctx, wp, metav1.CreateOptions{}) @@ -340,7 +342,6 @@ func createActorTemplate(ctx context.Context, t *testing.T, clients *e2e.Clients Namespace: nsObj.Name, Name: "counter", }, - Runsc: existingAt.Spec.Runsc, PauseImage: existingAt.Spec.PauseImage, Containers: existingAt.Spec.Containers, SnapshotsConfig: v1alpha1.SnapshotsConfig{ diff --git a/manifests/ate-install/sandboxconfig-gvisor.yaml b/manifests/ate-install/sandboxconfig-gvisor.yaml new file mode 100644 index 000000000..62f310d71 --- /dev/null +++ b/manifests/ate-install/sandboxconfig-gvisor.yaml @@ -0,0 +1,35 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Cluster-wide default SandboxConfig for the gVisor (runsc) sandbox class. A +# WorkerPool with sandboxClass gvisor (the default) and no explicit +# sandboxConfigName resolves to this. atelet fetches the runsc binary matching +# the worker node's architecture. To pin a different runsc, edit the assets +# below or create another SandboxConfig and name it from the WorkerPool. +apiVersion: ate.dev/v1alpha1 +kind: SandboxConfig +metadata: + name: gvisor-default +spec: + sandboxClass: gvisor + default: true + assets: + amd64: + runsc: + url: "gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc" + sha256: "a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63" + arm64: + runsc: + url: "gs://gvisor/releases/nightly/2026-05-19/aarch64/runsc" + sha256: "1ba2366ae2efceba166046f51a4104f9261c9cb72c6db8f5b3fe2dc57dea86b9" From ca581e1d492646ce38388a5456016427fd97d2bb Mon Sep 17 00:00:00 2001 From: Benjamin Elder Date: Tue, 16 Jun 2026 19:13:39 -0700 Subject: [PATCH 6/6] feat(api): validate SandboxConfig assets via VAP The CRD already requires each AssetFile to set url + sha256 (sha256 must be 64 hex). Add a ValidatingAdmissionPolicy enforcing the per-class requirement the CRD schema can't express: a gvisor SandboxConfig must define a "runsc" asset for every architecture under spec.assets (fail-closed at apply time, instead of only failing later when an actor tries to boot). Applied before the default SandboxConfig in install-ate.sh so the defaults are validated too. Add an envtest test that loads the shipped policy, waits for it to activate, and covers both the VAP (gvisor must have runsc) and the CRD-level url/sha256 requirements. --- hack/install-ate.sh | 4 + .../ate-install/sandboxconfig-validation.yaml | 46 +++++ .../v1alpha1/sandboxconfig_validation_test.go | 160 ++++++++++++++++++ 3 files changed, 210 insertions(+) create mode 100644 manifests/ate-install/sandboxconfig-validation.yaml create mode 100644 pkg/api/v1alpha1/sandboxconfig_validation_test.go diff --git a/hack/install-ate.sh b/hack/install-ate.sh index 35e01fbc1..5725ab079 100755 --- a/hack/install-ate.sh +++ b/hack/install-ate.sh @@ -218,6 +218,10 @@ deploy_ate_system() { log_step "deploy_ate_system" ensure_crds + # Enforce per-class SandboxConfig asset requirements (applied before any + # SandboxConfig so the defaults below are validated too). + run_kubectl apply -f manifests/ate-install/sandboxconfig-validation.yaml + # Install the cluster-wide default sandbox config(s). Sandbox binaries live on # cluster-scoped SandboxConfigs resolved via each WorkerPool's SandboxClass # (decoupled from ActorTemplate). gVisor pools resolve to this default unless diff --git a/manifests/ate-install/sandboxconfig-validation.yaml b/manifests/ate-install/sandboxconfig-validation.yaml new file mode 100644 index 000000000..9613a9eae --- /dev/null +++ b/manifests/ate-install/sandboxconfig-validation.yaml @@ -0,0 +1,46 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Per-sandbox-class asset requirements for SandboxConfig. The CRD schema is +# generic (any arch -> any asset name -> {url, sha256}); this policy enforces the +# requirements a given sandbox class actually needs, fail-closed at apply time. +# (url/sha256 being required and well-formed is enforced by the CRD schema.) +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicy +metadata: + name: sandboxconfig-assets +spec: + failurePolicy: Fail + matchConstraints: + resourceRules: + - apiGroups: ["ate.dev"] + apiVersions: ["v1alpha1"] + operations: ["CREATE", "UPDATE"] + resources: ["sandboxconfigs"] + validations: + # gVisor needs a "runsc" asset for every architecture it advertises. + # microvm requirements are enforced separately by the micro-VM runtime. + - expression: >- + object.spec.sandboxClass != 'gvisor' || + (has(object.spec.assets) && size(object.spec.assets) > 0 && + object.spec.assets.all(arch, 'runsc' in object.spec.assets[arch])) + message: "a gvisor SandboxConfig must define a 'runsc' asset for every architecture under spec.assets" +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicyBinding +metadata: + name: sandboxconfig-assets +spec: + policyName: sandboxconfig-assets + validationActions: ["Deny"] diff --git a/pkg/api/v1alpha1/sandboxconfig_validation_test.go b/pkg/api/v1alpha1/sandboxconfig_validation_test.go new file mode 100644 index 000000000..632c55263 --- /dev/null +++ b/pkg/api/v1alpha1/sandboxconfig_validation_test.go @@ -0,0 +1,160 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1alpha1 + +import ( + "context" + "fmt" + "os" + "strings" + "testing" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/util/wait" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" +) + +const validSHA256 = "a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63" + +// vapManifestPath is the SandboxConfig ValidatingAdmissionPolicy shipped with +// the install — loaded here so the test guards the policy we actually ship. +const vapManifestPath = "../../../manifests/ate-install/sandboxconfig-validation.yaml" + +func sandboxConfig(name string, class SandboxClass, assets map[string]map[string]AssetFile) *SandboxConfig { + return &SandboxConfig{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Spec: SandboxConfigSpec{ + SandboxClass: class, + Assets: assets, + }, + } +} + +func runscAsset() AssetFile { return AssetFile{URL: "gs://bucket/runsc", SHA256: validSHA256} } + +// applyVAP installs the shipped ValidatingAdmissionPolicy + binding into the +// envtest API server and waits for the apiserver to actually enforce it (policy +// activation is asynchronous), confirmed by a sentinel that must be denied. +func applyVAP(t *testing.T, ctx context.Context) { + t.Helper() + raw, err := os.ReadFile(vapManifestPath) + if err != nil { + t.Fatalf("read VAP manifest: %v", err) + } + for _, doc := range strings.Split(string(raw), "\n---") { + obj := map[string]any{} + if err := yaml.Unmarshal([]byte(doc), &obj); err != nil { + t.Fatalf("decode VAP doc: %v", err) + } + if len(obj) == 0 { + continue // comment-only / empty document + } + u := &unstructured.Unstructured{Object: obj} + if err := k8sClient.Create(ctx, u); err != nil && !strings.Contains(err.Error(), "already exists") { + t.Fatalf("create %s %q: %v", u.GetKind(), u.GetName(), err) + } + } + + // Wait until the policy is enforced: a gvisor config missing runsc (valid + // per the CRD schema) must be denied. + i := 0 + err = wait.PollUntilContextTimeout(ctx, 200*time.Millisecond, 30*time.Second, true, func(ctx context.Context) (bool, error) { + i++ + sc := sandboxConfig(fmt.Sprintf("vap-warmup-%d", i), SandboxClassGvisor, + map[string]map[string]AssetFile{"amd64": {"notrunsc": runscAsset()}}) + createErr := k8sClient.Create(ctx, sc) + if createErr == nil { + _ = k8sClient.Delete(ctx, sc) // policy not active yet; clean up and retry + return false, nil + } + return strings.Contains(createErr.Error(), "runsc"), nil + }) + if err != nil { + t.Fatalf("VAP did not become active: %v", err) + } +} + +func TestSandboxConfigValidation(t *testing.T) { + ctx := t.Context() + applyVAP(t, ctx) + + tests := []struct { + name string + sc *SandboxConfig + wantErr bool + errMsg string + }{{ + name: "valid gvisor with runsc", + sc: sandboxConfig("ok-gvisor", SandboxClassGvisor, map[string]map[string]AssetFile{"amd64": {"runsc": runscAsset()}, "arm64": {"runsc": runscAsset()}}), + wantErr: false, + }, { + name: "microvm is unconstrained by the gvisor rule", + sc: sandboxConfig("ok-microvm", "microvm", map[string]map[string]AssetFile{"amd64": {"cloud-hypervisor": runscAsset()}}), + wantErr: false, + }, { + name: "gvisor arch missing runsc", + sc: sandboxConfig("bad-no-runsc", SandboxClassGvisor, map[string]map[string]AssetFile{"amd64": {"notrunsc": runscAsset()}}), + wantErr: true, + errMsg: "runsc", + }, { + name: "gvisor one arch missing runsc", + sc: sandboxConfig("bad-mixed-arch", SandboxClassGvisor, map[string]map[string]AssetFile{"amd64": {"runsc": runscAsset()}, "arm64": {"notrunsc": runscAsset()}}), + wantErr: true, + errMsg: "runsc", + }, { + name: "gvisor with no assets", + sc: sandboxConfig("bad-empty", SandboxClassGvisor, nil), + wantErr: true, + errMsg: "runsc", + }, { + name: "asset missing url", + sc: sandboxConfig("bad-no-url", SandboxClassGvisor, map[string]map[string]AssetFile{"amd64": {"runsc": {SHA256: validSHA256}}}), + wantErr: true, + errMsg: "url", + }, { + name: "asset missing sha256", + sc: sandboxConfig("bad-no-sha", SandboxClassGvisor, map[string]map[string]AssetFile{"amd64": {"runsc": {URL: "gs://bucket/runsc"}}}), + wantErr: true, + errMsg: "sha256", + }, { + name: "asset sha256 not 64 hex", + sc: sandboxConfig("bad-sha", SandboxClassGvisor, map[string]map[string]AssetFile{"amd64": {"runsc": {URL: "gs://bucket/runsc", SHA256: "deadbeef"}}}), + wantErr: true, + errMsg: "sha256", + }} + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := k8sClient.Create(ctx, tt.sc) + if !tt.wantErr { + if err != nil { + t.Fatalf("Create() unexpected error: %v", err) + } + t.Cleanup(func() { _ = k8sClient.Delete(ctx, tt.sc, &client.DeleteOptions{}) }) + return + } + if err == nil { + _ = k8sClient.Delete(ctx, tt.sc) + t.Fatalf("Create() succeeded, want denied") + } + if tt.errMsg != "" && !strings.Contains(err.Error(), tt.errMsg) { + t.Errorf("Create() error = %q, want it to contain %q", err.Error(), tt.errMsg) + } + }) + } +}