diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 32db68f..0b19566 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,47 +29,38 @@ jobs: matrix: go-version: [1.21.x] os: [ubuntu-latest] - architecture: [x32, x64] - name: Generate/Build/Test (${{ matrix.os }}, ${{ matrix.architecture }}, Go ${{ matrix.go-version }}) + architecture: [x64] + name: Build/Test (${{ matrix.os }}, ${{ matrix.architecture }}, Go ${{ matrix.go-version }}) runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 - - uses: actions/setup-go@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 with: go-version: ${{ matrix.go-version }} architecture: ${{ matrix.architecture }} + cache: true - name: Build all modules - run: CGO_ENABLED=0 go build -v + run: CGO_ENABLED=0 go build -v ./... - name: Test all modules - run: CGO_ENABLED=0 go test ./deviceplugin/... -v + run: CGO_ENABLED=0 go test ./... -v lint: - strategy: - matrix: - go-version: [1.21.x] - os: [ubuntu-latest] - dir: ["./"] - name: Lint ${{ matrix.dir }} (${{ matrix.os }}, Go ${{ matrix.go-version }}) - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-go@v2 + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 with: - go-version: ${{ matrix.go-version }} + go-version: 1.21.x + cache: true - name: Run golangci-lint - uses: golangci/golangci-lint-action@v3.2.0 + uses: golangci/golangci-lint-action@v4 with: version: latest - working-directory: ${{ matrix.dir }} args: > - -D errcheck + -E errcheck -E stylecheck -E goimports -E misspell -E revive -E gofmt - -E goimports - --exclude-use-default=false - --max-same-issues=0 - --max-issues-per-linter=0 - --timeout 2m + --timeout 5m diff --git a/README.md b/README.md index 66a6cef..8c4548e 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ this plugin deployed in your Kubernetes cluster, you will be able to run jobs * This plugin targets Kubernetes v1.18+. ## Deployment -The device plugin needs to be run on all the nodes that are equipped with Confidential Computing devices (e.g. TPM). The simplest way of doing so is to create a Kubernetes [DaemonSet][dp], which run a copy of a pod on all (or some) Nodes in the cluster. We have a pre-built Docker image on [Goolge Artifact Registry][release] that you can use for with your DaemonSet. This repository also have a pre-defined yaml file named `cc-device-plugin.yaml`. You can create a DaemonSet in your Kubernetes cluster by running this command: +The device plugin needs to be run on all the nodes that are equipped with Confidential Computing devices (e.g. TPM). The simplest way of doing so is to create a Kubernetes [DaemonSet][dp], which run a copy of a pod on all (or some) Nodes in the cluster. We have a pre-built Docker image on [Google Artifact Registry][release] that you can use for with your DaemonSet. This repository also have a pre-defined yaml file named `cc-device-plugin.yaml`. You can create a DaemonSet in your Kubernetes cluster by running this command: ``` kubectl create -f manifests/cc-device-plugin.yaml diff --git a/deviceplugin/ccdevice.go b/deviceplugin/ccdevice.go index a8b98c2..6987648 100644 --- a/deviceplugin/ccdevice.go +++ b/deviceplugin/ccdevice.go @@ -22,6 +22,7 @@ import ( "fmt" "os" "path/filepath" + "strings" "sync" "time" @@ -32,9 +33,18 @@ import ( ) const ( - deviceCheckInterval = 5 * time.Second - // By default, GKE allows up to 110 Pods per node on Standard clusters. Standard clusters can be configured to allow up to 256 Pods per node. - workloadSharedLimit = 256 + deviceCheckInterval = 5 * time.Second + copiedEventLogDirectory = "/run/cc-device-plugin" + copiedEventLogLocation = "/run/cc-device-plugin/binary_bios_measurements" + containerEventLogDirectory = "/run/cc-device-plugin" +) + +// AttestationType defines if the attestation is based on software emulation or hardware. +type AttestationType string + +const ( + SoftwareAttestation AttestationType = "software" // e.g., vTPM + HardwareAttestation AttestationType = "hardware" // e.g., Intel TDX, AMD SEV-SNP ) var ( @@ -47,6 +57,8 @@ type CcDeviceSpec struct { Resource string DevicePaths []string MeasurementPaths []string + DeviceLimit int // Number of allocatable instances of this resource + Type AttestationType // New flag to explicitly define the device type } // CcDevice wraps the v1.beta1.Device type, which has hostPath, containerPath and permission @@ -54,18 +66,16 @@ type CcDevice struct { v1beta1.Device DeviceSpecs []*v1beta1.DeviceSpec Mounts []*v1beta1.Mount - // Limit specifies the cap number of workloads sharing a worker node - Limit int } // CcDevicePlugin is a device plugin for cc devices type CcDevicePlugin struct { cds *CcDeviceSpec ccDevices map[string]CcDevice + logger log.Logger copiedEventLogDirectory string copiedEventLogLocation string containerEventLogDirectory string - logger log.Logger // this lock prevents data race when kubelet sends multiple requests at the same time mu sync.Mutex @@ -79,14 +89,17 @@ func NewCcDevicePlugin(cds *CcDeviceSpec, devicePluginPath string, socket string if logger == nil { logger = log.NewNopLogger() } + if cds.DeviceLimit <= 0 { + cds.DeviceLimit = 1 // Default to 1 if not specified + } cdp := &CcDevicePlugin{ cds: cds, ccDevices: make(map[string]CcDevice), logger: logger, - copiedEventLogDirectory: "/run/cc-device-plugin", - copiedEventLogLocation: "/run/cc-device-plugin/binary_bios_measurements", - containerEventLogDirectory: "/run/cc-device-plugin", + copiedEventLogDirectory: copiedEventLogDirectory, + copiedEventLogLocation: copiedEventLogLocation, // Note: This path is static, used only by vTPM plugin instance. + containerEventLogDirectory: containerEventLogDirectory, deviceGauge: prometheus.NewGauge(prometheus.GaugeOpts{ Name: "cc_device_plugin_devices", Help: "The number of cc devices managed by this device plugin.", @@ -97,16 +110,19 @@ func NewCcDevicePlugin(cds *CcDeviceSpec, devicePluginPath string, socket string }), } - // Check if the copiedEventLogDirectory directory exists - if _, err := os.Stat(cdp.copiedEventLogDirectory); os.IsNotExist(err) { - // Create the directory - err = os.Mkdir(cdp.copiedEventLogDirectory, 0755) - if err != nil { - return nil, err + // Only create the directory if the device type is software-based (e.g., vTPM), + // as hardware-based devices (TDX/SNP) do not require copying measurement files to /run. + if cdp.cds.Type == SoftwareAttestation { + if _, err := os.Stat(cdp.copiedEventLogDirectory); os.IsNotExist(err) { + // Create the directory + err = os.MkdirAll(cdp.copiedEventLogDirectory, 0755) + if err != nil { + return nil, err + } + _ = level.Info(cdp.logger).Log("msg", "Directory created:"+cdp.copiedEventLogDirectory) + } else { + _ = level.Info(cdp.logger).Log("msg", "Directory already exists:"+cdp.copiedEventLogDirectory) } - level.Info(cdp.logger).Log("msg", "Directory created:"+cdp.copiedEventLogDirectory) - } else { - level.Info(cdp.logger).Log("msg", "Directory already exists:"+cdp.copiedEventLogDirectory) } if reg != nil { @@ -118,75 +134,109 @@ func NewCcDevicePlugin(cds *CcDeviceSpec, devicePluginPath string, socket string func (cdp *CcDevicePlugin) discoverCcDevices() ([]CcDevice, error) { var ccDevices []CcDevice - cd := CcDevice{ - Device: v1beta1.Device{ - Health: v1beta1.Healthy, - }, - // set cap - Limit: workloadSharedLimit, - } - h := sha1.New() + var foundDevicePaths []string + + // We use foundDevicePaths as an accumulator because a single resource (like TDX) + // might be represented by multiple device path patterns. for _, path := range cdp.cds.DevicePaths { matches, err := filepath.Glob(path) if err != nil { return nil, err } - for _, matchPath := range matches { - level.Info(cdp.logger).Log("msg", "device path found:"+matchPath) - cd.DeviceSpecs = append(cd.DeviceSpecs, &v1beta1.DeviceSpec{ - HostPath: matchPath, - ContainerPath: matchPath, - Permissions: "mrw", - }) + if len(matches) > 0 { + _ = level.Info(cdp.logger).Log("msg", "found matching device path(s)", "pattern", path, "matches", strings.Join(matches, ",")) + foundDevicePaths = append(foundDevicePaths, matches...) } } - for _, path := range cdp.cds.MeasurementPaths { - matches, err := filepath.Glob(path) - if err != nil { - return nil, err + // If no device paths were found for this resource type, simply return an empty list. + // This is not an error; the node just doesn't have this specific hardware. + if len(foundDevicePaths) == 0 { + return nil, nil + } + + baseDevice := CcDevice{ + Device: v1beta1.Device{ + Health: v1beta1.Healthy, + }, + } + + for _, matchPath := range foundDevicePaths { + baseDevice.DeviceSpecs = append(baseDevice.DeviceSpecs, &v1beta1.DeviceSpec{ + HostPath: matchPath, + ContainerPath: matchPath, + Permissions: "mrw", + }) + } + + // Measurement files are currently only expected for software-emulated devices (vTPM). + if cdp.cds.Type == SoftwareAttestation && len(cdp.cds.MeasurementPaths) > 0 { + var foundMeasurementPath string + for _, path := range cdp.cds.MeasurementPaths { + matches, err := filepath.Glob(path) + if err != nil { + return nil, err + } + if len(matches) > 0 { + // We only expect one measurement file + foundMeasurementPath = matches[0] + _ = level.Info(cdp.logger).Log("msg", "measurement path found", "path", foundMeasurementPath) + break + } } - for _, matchPath := range matches { - level.Info(cdp.logger).Log("msg", "measurement path found:"+matchPath) - cd.Mounts = append(cd.Mounts, &v1beta1.Mount{ + if foundMeasurementPath != "" { + baseDevice.Mounts = append(baseDevice.Mounts, &v1beta1.Mount{ HostPath: cdp.copiedEventLogDirectory, ContainerPath: cdp.containerEventLogDirectory, ReadOnly: true, }) - // copy when no measurement file at copiedEventLogLocation fileInfo, err := os.Stat(cdp.copiedEventLogLocation) if errors.Is(err, os.ErrNotExist) { - err := copyMeasurementFile(matchPath, cdp.copiedEventLogLocation) - if err != nil { + if err := copyMeasurementFile(foundMeasurementPath, cdp.copiedEventLogLocation); err != nil { + _ = level.Error(cdp.logger).Log("msg", "failed to copy measurement file", "error", err) return nil, err } - } else { - // copy when measurement file at /run was updated, but not by the current instance. - // measurementFileLastUpdate is init to 0. - // when file exists during first run, this instance deletes and creates a new file - if fileInfo.ModTime().After(measurementFileLastUpdate) { - err := copyMeasurementFile(matchPath, cdp.copiedEventLogLocation) - if err != nil { - return nil, err - } + } else if err == nil && fileInfo.ModTime().After(measurementFileLastUpdate) { + // Refresh the copy if the source file has been updated by the kernel since the last copy. + if err := copyMeasurementFile(foundMeasurementPath, cdp.copiedEventLogLocation); err != nil { + _ = level.Error(cdp.logger).Log("msg", "failed to re-copy measurement file", "error", err) + return nil, err } + } else if err != nil { + _ = level.Error(cdp.logger).Log("msg", "failed to stat copied measurement file", "error", err) + return nil, err } + } else { + _ = level.Warn(cdp.logger).Log("msg", "MeasurementPaths specified but no measurement file found", "paths", strings.Join(cdp.cds.MeasurementPaths, ",")) } } - if cd.DeviceSpecs != nil { - for i := 0; i < cd.Limit; i++ { - b := make([]byte, 1) - b[0] = byte(i) - cd.ID = fmt.Sprintf("%x", h.Sum(b)) - ccDevices = append(ccDevices, cd) + + // Create DeviceLimit instances of the device + h := sha1.New() + h.Write([]byte(cdp.cds.Resource)) + baseID := fmt.Sprintf("%x", h.Sum(nil)) + + for i := 0; i < cdp.cds.DeviceLimit; i++ { + cd := baseDevice // Copy the base structure + // For single-limit devices, ID is baseID. For multi-limit, append index. + if cdp.cds.DeviceLimit > 1 { + cd.ID = fmt.Sprintf("%s-%d", baseID, i) + } else { + cd.ID = baseID } + ccDevices = append(ccDevices, cd) } return ccDevices, nil } func copyMeasurementFile(src string, dest string) error { + // get time for src + sourceInfo, err := os.Stat(src) + if err != nil { + return err + } // copy out measurement eventlogFile, err := os.ReadFile(src) if err != nil { @@ -201,11 +251,7 @@ func copyMeasurementFile(src string, dest string) error { if err != nil { return err } - fileInfo, err := os.Stat(dest) - if err != nil { - return err - } - measurementFileLastUpdate = fileInfo.ModTime() + measurementFileLastUpdate = sourceInfo.ModTime() return nil } @@ -235,18 +281,28 @@ func (cdp *CcDevicePlugin) refreshDevices() (bool, error) { devicesUnchange = false } } - if !devicesUnchange { - return false, nil + if len(ccDevices) != len(old) { + devicesUnchange = false + } + + if devicesUnchange { + return true, nil } - // Check if devices were removed. + // Log if devices were removed for k := range old { if _, ok := cdp.ccDevices[k]; !ok { - level.Warn(cdp.logger).Log("msg", "devices removed") - return false, nil + _ = level.Info(cdp.logger).Log("msg", "device removed", "id", k) } } - return true, nil + // Log if devices were added + for k := range cdp.ccDevices { + if _, ok := old[k]; !ok { + _ = level.Info(cdp.logger).Log("msg", "device added", "id", k) + } + } + + return false, nil } // Allocate assigns cc devices to a Pod. @@ -267,19 +323,18 @@ func (cdp *CcDevicePlugin) Allocate(_ context.Context, req *v1beta1.AllocateRequ if ccDevice.Health != v1beta1.Healthy { return nil, fmt.Errorf("requested cc device is not healthy %q", id) } - level.Info(cdp.logger).Log("msg", "adding device and measurement to Pod, device id is:"+id) + _ = level.Info(cdp.logger).Log("msg", "adding device and measurement to Pod", "device id", id) for _, ds := range ccDevice.DeviceSpecs { - level.Info(cdp.logger).Log("msg", "added ccDevice.deviceSpecs is:"+ds.String()) + _ = level.Debug(cdp.logger).Log("msg", "added ccDevice.deviceSpecs", "spec", ds.String()) } for _, dm := range ccDevice.Mounts { - level.Info(cdp.logger).Log("msg", "added ccDevice.mounts is:"+dm.String()) + _ = level.Debug(cdp.logger).Log("msg", "added ccDevice.mounts", "mount", dm.String()) } resp.Devices = append(resp.Devices, ccDevice.DeviceSpecs...) resp.Mounts = append(resp.Mounts, ccDevice.Mounts...) - } res.ContainerResponses = append(res.ContainerResponses, resp) } @@ -294,27 +349,30 @@ func (cdp *CcDevicePlugin) GetDevicePluginOptions(_ context.Context, _ *v1beta1. // ListAndWatch lists all devices and then refreshes every deviceCheckInterval. func (cdp *CcDevicePlugin) ListAndWatch(_ *v1beta1.Empty, stream v1beta1.DevicePlugin_ListAndWatchServer) error { - level.Info(cdp.logger).Log("msg", "starting list and watch") + _ = level.Info(cdp.logger).Log("msg", "starting list and watch") if _, err := cdp.refreshDevices(); err != nil { return err } - refreshComplete := false - var err error + for { - if !refreshComplete { - res := new(v1beta1.ListAndWatchResponse) - for _, dev := range cdp.ccDevices { - res.Devices = append(res.Devices, &v1beta1.Device{ID: dev.ID, Health: dev.Health}) - } - if err := stream.Send(res); err != nil { - return err - } + res := new(v1beta1.ListAndWatchResponse) + cdp.mu.Lock() + for _, dev := range cdp.ccDevices { + res.Devices = append(res.Devices, &v1beta1.Device{ID: dev.ID, Health: dev.Health}) } - <-time.After(deviceCheckInterval) - refreshComplete, err = cdp.refreshDevices() - if err != nil { + cdp.mu.Unlock() + + if err := stream.Send(res); err != nil { + _ = level.Error(cdp.logger).Log("msg", "failed to send ListAndWatchResponse", "error", err) return err } + + <-time.After(deviceCheckInterval) + + if _, err := cdp.refreshDevices(); err != nil { + _ = level.Error(cdp.logger).Log("msg", "error during device refresh", "error", err) + // Don't return error immediately, try to continue + } } } diff --git a/deviceplugin/ccdevice_test.go b/deviceplugin/ccdevice_test.go index aebba20..d08e4b8 100644 --- a/deviceplugin/ccdevice_test.go +++ b/deviceplugin/ccdevice_test.go @@ -20,13 +20,12 @@ import ( "errors" "fmt" "os" + "path/filepath" "testing" "time" "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" "github.com/oklog/run" "github.com/prometheus/client_golang/prometheus" "google.golang.org/grpc/metadata" @@ -34,8 +33,7 @@ import ( ) const ( - ccResourceName = namespace + "/testccdevicetype" - testBuffer = 3 * time.Second + testBuffer = 3 * time.Second ) var ( @@ -44,234 +42,229 @@ var ( func init() { logger = log.NewJSONLogger(log.NewSyncWriter(os.Stdout)) - logger = level.NewFilter(logger, level.AllowInfo()) + logger = level.NewFilter(logger, level.AllowAll()) logger = log.With(logger, "timestamp", log.DefaultTimestampUTC) logger = log.With(logger, "caller", log.DefaultCaller) - } -func constructCcDevicePlugin(t *testing.T) *CcDevicePlugin { - ccDevicePath := "/tmp/testccdevice" + t.Name() - ccMeasurmentPath := "/tmp/testmeasurement" + t.Name() +// constructTestPlugin creates a *CcDevicePlugin using a temporary directory for isolation. +func constructTestPlugin(t *testing.T, spec *CcDeviceSpec) *CcDevicePlugin { + t.Helper() + tmpDir := t.TempDir() - ccDevicePaths := []string{ccDevicePath} - ccMeasurmentPaths := []string{ccMeasurmentPath} + // Create dummy device files + for idx, path := range spec.DevicePaths { + absPath := filepath.Join(tmpDir, path) + if err := os.MkdirAll(filepath.Dir(absPath), 0755); err != nil { + t.Fatalf("failed to create dir: %v", err) + } + if err := os.WriteFile(absPath, []byte("test_device"), 0644); err != nil { + t.Fatalf("failed to create mock device: %v", err) + } + spec.DevicePaths[idx] = absPath + } - ccDeviceSpec := &CcDeviceSpec{ - Resource: ccResourceName, - DevicePaths: ccDevicePaths, - MeasurementPaths: ccMeasurmentPaths, + // Create dummy measurement files + for idx, path := range spec.MeasurementPaths { + absPath := filepath.Join(tmpDir, path) + if err := os.MkdirAll(filepath.Dir(absPath), 0755); err != nil { + t.Fatalf("failed to create dir: %v", err) + } + if err := os.WriteFile(absPath, []byte("test_measurement"), 0644); err != nil { + t.Fatalf("failed to create mock measurement: %v", err) + } + spec.MeasurementPaths[idx] = absPath } - testCcDevicePlugin := CcDevicePlugin{ - cds: ccDeviceSpec, + cdp := &CcDevicePlugin{ + cds: spec, ccDevices: make(map[string]CcDevice), - copiedEventLogDirectory: "/tmp/cc-device-plugin", - copiedEventLogLocation: "/tmp/cc-device-plugin/run_testcopiedmeasurement" + t.Name(), - containerEventLogDirectory: "/run/cc-device-plugin", logger: logger, + copiedEventLogDirectory: filepath.Join(tmpDir, "run/cc-device-plugin"), + copiedEventLogLocation: filepath.Join(tmpDir, "run/cc-device-plugin/binary_bios_measurements"), + containerEventLogDirectory: "/run/cc-device-plugin", deviceGauge: prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "cc_device_plugin_devices", - Help: "The number of cc devices managed by this device plugin.", + Name: "test_cc_devices_" + t.Name(), }), allocationsCounter: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "cc_device_plugin_allocations_total", - Help: "The total number of cc device allocations made by this device plugin.", + Name: "test_cc_allocations_" + t.Name(), }), } - // Check if the copiedEventLogDirectory directory exists - if _, err := os.Stat(testCcDevicePlugin.copiedEventLogDirectory); os.IsNotExist(err) { - // Create the directory - err = os.Mkdir(testCcDevicePlugin.copiedEventLogDirectory, 0755) - if err != nil { - level.Warn(testCcDevicePlugin.logger).Log("msg", "Error creating directory:"+testCcDevicePlugin.copiedEventLogDirectory) - t.Errorf("failed to create directory: %v", err) + // For SoftwareAttestation, we expect the directory to be created + if spec.Type == SoftwareAttestation { + if err := os.MkdirAll(cdp.copiedEventLogDirectory, 0755); err != nil { + t.Fatalf("failed to create directory: %v", err) } - level.Info(testCcDevicePlugin.logger).Log("msg", "Directory created:"+testCcDevicePlugin.copiedEventLogDirectory) - } else { - level.Info(testCcDevicePlugin.logger).Log("msg", "Directory already exists:"+testCcDevicePlugin.copiedEventLogDirectory) } - for _, ccDevicePath := range ccDevicePaths { - os.Remove(ccDevicePath) - err := os.WriteFile(ccDevicePath, []byte("TestCcDevice"), 0777) - if err != nil { - t.Errorf("failed to WriteFile: %v", err) - } - } - for _, ccMeasurmentPath := range ccMeasurmentPaths { - os.Remove(ccMeasurmentPath) - err := os.WriteFile(ccMeasurmentPath, []byte("TestCcDevice"), 0777) - if err != nil { - t.Errorf("failed to WriteFile: %v", err) - } - } + return cdp +} - os.Remove(testCcDevicePlugin.copiedEventLogLocation) - return &testCcDevicePlugin +func getExpectedID(resourceName string, limit int, index int) string { + h := sha1.New() + h.Write([]byte(resourceName)) + baseID := fmt.Sprintf("%x", h.Sum(nil)) + if limit > 1 { + return fmt.Sprintf("%s-%d", baseID, index) + } + return baseID } -func TestDiscoverCcDevices(t *testing.T) { - testCcDevicePlugin := constructCcDevicePlugin(t) - gotCcDevices, err := testCcDevicePlugin.discoverCcDevices() - if err != nil { - t.Errorf("failed to discoverCcDevices: %v", err) - return +func TestDiscoverTDX(t *testing.T) { + spec := &CcDeviceSpec{ + Resource: "intel.com/tdx", + Type: HardwareAttestation, + DevicePaths: []string{"dev/tdx-guest"}, + DeviceLimit: 1, } - // discoverCcDevices copies measurement file, delete after test. - err = os.Remove(testCcDevicePlugin.copiedEventLogLocation) + cdp := constructTestPlugin(t, spec) + devices, err := cdp.discoverCcDevices() if err != nil { - t.Errorf("failed to delete: %v", err) - return - } - - wantCcDevice := CcDevice{ - Device: v1beta1.Device{ - Health: v1beta1.Healthy, - }, - DeviceSpecs: []*v1beta1.DeviceSpec{{ - HostPath: testCcDevicePlugin.cds.DevicePaths[0], - ContainerPath: testCcDevicePlugin.cds.DevicePaths[0], - Permissions: "mrw", - }}, - Mounts: []*v1beta1.Mount{{ - HostPath: testCcDevicePlugin.copiedEventLogDirectory, - ContainerPath: testCcDevicePlugin.containerEventLogDirectory, - ReadOnly: true, - }}, - Limit: workloadSharedLimit, + t.Fatalf("discoverCcDevices failed: %v", err) } - var wantCcDevices []CcDevice - for i := 0; i < wantCcDevice.Limit; i++ { - wantCcDevices = append(wantCcDevices, wantCcDevice) + if len(devices) != 1 { + t.Fatalf("Expected 1 device, got %d", len(devices)) } - - if !cmp.Equal(gotCcDevices, wantCcDevices, cmpopts.IgnoreFields(v1beta1.Device{}, "ID")) { - t.Errorf("ccDevices do not match expected value: got %v, want %v", gotCcDevices, wantCcDevices) + // Hardware-based should NOT have mounts + if len(devices[0].Mounts) != 0 { + t.Errorf("TDX should have 0 mounts, got %d", len(devices[0].Mounts)) } } -func TestDiscoverCcDevicesPermissionFailure(t *testing.T) { - testCcDevicePlugin := constructCcDevicePlugin(t) - testCcDevicePlugin.copiedEventLogDirectory = "/tmp/cc-device-plugin" - testCcDevicePlugin.copiedEventLogLocation = "/tmp/cc-device-plugin/run_testcopiedmeasurement" + t.Name() - _, err := testCcDevicePlugin.discoverCcDevices() - if err != nil && !errors.Is(err, os.ErrPermission) { - t.Errorf("failed to discoverCcDevices: %v", err) - return +func TestDiscoverSEVSNP(t *testing.T) { + spec := &CcDeviceSpec{ + Resource: "amd.com/sev-snp", + Type: HardwareAttestation, + DevicePaths: []string{"dev/sev-guest"}, + DeviceLimit: 1, + } + cdp := constructTestPlugin(t, spec) + devices, err := cdp.discoverCcDevices() + if err != nil { + t.Fatalf("discoverCcDevices failed: %v", err) + } + + if len(devices) != 1 { + t.Fatalf("Expected 1 device, got %d", len(devices)) + } + if len(devices[0].Mounts) != 0 { + t.Errorf("SEV-SNP should have 0 mounts, got %d", len(devices[0].Mounts)) } } -func TestRefreshDevices(t *testing.T) { - testCcDevicePlugin := constructCcDevicePlugin(t) - // first time - wantSameCcDeviceMap := false - gotSameCcDeviceMap, err := testCcDevicePlugin.refreshDevices() +func TestDiscoverTPM(t *testing.T) { + spec := &CcDeviceSpec{ + Resource: "google.com/cc", + Type: SoftwareAttestation, + DevicePaths: []string{"dev/tpmrm0"}, + MeasurementPaths: []string{"sys/binary_bios_measurements"}, + DeviceLimit: 256, + } + cdp := constructTestPlugin(t, spec) + devices, err := cdp.discoverCcDevices() if err != nil { - t.Errorf("refreshDevices failed") + t.Fatalf("discoverCcDevices failed: %v", err) } - if gotSameCcDeviceMap != wantSameCcDeviceMap { - t.Errorf("first time refreshDevices return does not match expected value: got %v, want %v", gotSameCcDeviceMap, wantSameCcDeviceMap) + + if len(devices) != 256 { + t.Fatalf("Expected 256 devices, got %d", len(devices)) } - wantNumOfCcDevices := workloadSharedLimit - gotNumOfCcDevices := len(testCcDevicePlugin.ccDevices) - if len(testCcDevicePlugin.ccDevices) != wantNumOfCcDevices { - t.Errorf("first time refreshDevices map ccdevices does not match expected value: got %v, want %v", gotNumOfCcDevices, wantNumOfCcDevices) + + // Software-based (vTPM) SHOULD have mounts + if len(devices[0].Mounts) == 0 { + t.Errorf("TPM should have mounts for event log copying") } - os.Remove(testCcDevicePlugin.copiedEventLogLocation) - // second time - wantSameCcDeviceMap = true - gotSameCcDeviceMap, err = testCcDevicePlugin.refreshDevices() - if err != nil { - t.Errorf("refreshDevices failed") + // Verify file was actually copied to the temporary "run" dir + if _, err := os.Stat(cdp.copiedEventLogLocation); err != nil { + t.Errorf("Measurement file was not copied to target location: %v", err) } - if gotSameCcDeviceMap != wantSameCcDeviceMap { - t.Errorf("second time refreshDevices return does not match expected value: got %v, want %v", gotSameCcDeviceMap, wantSameCcDeviceMap) +} + +func TestRefreshDevices(t *testing.T) { + spec := &CcDeviceSpec{ + Resource: "intel.com/tdx", + Type: HardwareAttestation, + DevicePaths: []string{"dev/tdx-guest"}, + DeviceLimit: 1, } - os.Remove(testCcDevicePlugin.copiedEventLogLocation) + cdp := constructTestPlugin(t, spec) + devPath := spec.DevicePaths[0] - // third time remove ccDeivces - wantSameCcDeviceMap = false - ccDevicePath := "/tmp/testccdevice" + t.Name() - ccMeasurmentPath := "/tmp/testmeasurement" + t.Name() - os.Remove(ccDevicePath) - os.Remove(ccMeasurmentPath) + // 1. Initial Refresh + changed, err := cdp.refreshDevices() + if err != nil || changed { + t.Errorf("First refresh: err=%v, changed=%v (want false)", err, changed) + } - gotSameCcDeviceMap, err = testCcDevicePlugin.refreshDevices() - if err != nil { - t.Errorf("refreshDevices failed") + // 2. Second Refresh (No change) + changed, err = cdp.refreshDevices() + if err != nil || !changed { + t.Errorf("Second refresh: err=%v, changed=%v (want true)", err, changed) } - if gotSameCcDeviceMap != wantSameCcDeviceMap { - t.Errorf("third time refreshDevices return does not match expected value: got %v, want %v", gotSameCcDeviceMap, wantSameCcDeviceMap) + + // 3. Remove device and refresh + os.Remove(devPath) + changed, err = cdp.refreshDevices() + if err != nil || changed { + t.Errorf("Third refresh (removed): err=%v, changed=%v (want false)", err, changed) + } + if len(cdp.ccDevices) != 0 { + t.Errorf("Expected 0 devices, got %d", len(cdp.ccDevices)) } - os.Remove(testCcDevicePlugin.copiedEventLogLocation) } func TestAllocate(t *testing.T) { - testCcDevicePlugin := constructCcDevicePlugin(t) - _, err := testCcDevicePlugin.refreshDevices() - if err != nil { - t.Errorf("refreshDevices failed") + spec := &CcDeviceSpec{ + Resource: "google.com/cc", + Type: SoftwareAttestation, + DevicePaths: []string{"dev/tpmrm0"}, + MeasurementPaths: []string{"sys/binary_bios_measurements"}, + DeviceLimit: 2, + } + cdp := constructTestPlugin(t, spec) + if _, err := cdp.refreshDevices(); err != nil { + t.Fatalf("refreshDevices failed: %v", err) } ctx := context.Background() - h := sha1.New() - b := make([]byte, 1) - - for i := 0; i < workloadSharedLimit; i++ { - b[0] = byte(i) - req := &v1beta1.AllocateRequest{ - ContainerRequests: []*v1beta1.ContainerAllocateRequest{{ - DevicesIDs: []string{fmt.Sprintf("%x", h.Sum(b))}, - }}, - } - gotRes, err := testCcDevicePlugin.Allocate(ctx, req) - if err != nil { - t.Errorf("Allocate failed") - } + expectedID := getExpectedID(spec.Resource, spec.DeviceLimit, 0) - ccDevicePath := "/tmp/testccdevice" + t.Name() - wantRes := &v1beta1.AllocateResponse{ - ContainerResponses: []*v1beta1.ContainerAllocateResponse{{ - Devices: []*v1beta1.DeviceSpec{{ - ContainerPath: ccDevicePath, - HostPath: ccDevicePath, - Permissions: "mrw", - }}, - Mounts: []*v1beta1.Mount{{ - ContainerPath: testCcDevicePlugin.containerEventLogDirectory, - HostPath: testCcDevicePlugin.copiedEventLogDirectory, - ReadOnly: true, - }}, - }}, - } + req := &v1beta1.AllocateRequest{ + ContainerRequests: []*v1beta1.ContainerAllocateRequest{{ + DevicesIDs: []string{expectedID}, + }}, + } - if !cmp.Equal(gotRes, wantRes) { - t.Errorf("AllocateResponse does not match expected value: got %v, want %v", gotRes, wantRes) - } + resp, err := cdp.Allocate(ctx, req) + if err != nil { + t.Fatalf("Allocate failed: %v", err) + } + + if len(resp.ContainerResponses) != 1 { + t.Fatalf("Expected 1 response, got %d", len(resp.ContainerResponses)) + } + + // Verify the response contains the mount for software attestation + if len(resp.ContainerResponses[0].Mounts) == 0 { + t.Errorf("Expected mount in AllocateResponse for software attestation") } } func TestAllocateNotExistDevice(t *testing.T) { - notExsitDeviceName := "NotExistDevice" - testCcDevicePlugin := constructCcDevicePlugin(t) - _, err := testCcDevicePlugin.refreshDevices() - if err != nil { - t.Errorf("refreshDevices failed") - } + spec := &CcDeviceSpec{Resource: "test", Type: HardwareAttestation} + cdp := constructTestPlugin(t, spec) - ctx := context.Background() req := &v1beta1.AllocateRequest{ ContainerRequests: []*v1beta1.ContainerAllocateRequest{{ - DevicesIDs: []string{notExsitDeviceName}, + DevicesIDs: []string{"NonExistentID"}, }}, } - _, err = testCcDevicePlugin.Allocate(ctx, req) - if err.Error() != "requested cc device does not exist \""+notExsitDeviceName+"\"" { - t.Errorf("Allocate failed") + _, err := cdp.Allocate(context.Background(), req) + if err == nil { + t.Fatal("expected error for non-existent device, got nil") } } @@ -281,83 +274,56 @@ type listAndWatchServerStub struct { func (d *listAndWatchServerStub) Send(*v1beta1.ListAndWatchResponse) error { if d.testComplete { - return errors.New("") + return errors.New("test complete") } return nil } -func (d *listAndWatchServerStub) SetTestComplete() { - d.testComplete = true -} - -func (d *listAndWatchServerStub) SetHeader(metadata.MD) error { - return nil -} - -func (d *listAndWatchServerStub) SendHeader(metadata.MD) error { - return nil -} - -func (d *listAndWatchServerStub) SetTrailer(metadata.MD) { -} +func (d *listAndWatchServerStub) SetTestComplete() { d.testComplete = true } +func (d *listAndWatchServerStub) SetHeader(metadata.MD) error { return nil } +func (d *listAndWatchServerStub) SendHeader(metadata.MD) error { return nil } +func (d *listAndWatchServerStub) SetTrailer(metadata.MD) { /* no-op for testing */ } +func (d *listAndWatchServerStub) Context() context.Context { return context.Background() } +func (d *listAndWatchServerStub) SendMsg(any) error { return nil } +func (d *listAndWatchServerStub) RecvMsg(any) error { return nil } -func (d *listAndWatchServerStub) Context() context.Context { - return context.Background() -} - -func (d *listAndWatchServerStub) SendMsg(any) error { - return nil -} - -func (d *listAndWatchServerStub) RecvMsg(any) error { - return nil -} - -// The ListAndWatch function does not stop when no error. We use a timer to stop the -// ListAndWatch function when no error. The ListAndWatch function refresh devices every -// deviceCheckInterval. So the timer waits for deviceCheckInterval. We add a testBuffer -// to timer in case the timer ends before devices are refreshed. func TestListAndWatch(t *testing.T) { - testCcDevicePlugin := constructCcDevicePlugin(t) - + spec := &CcDeviceSpec{ + Resource: "intel.com/tdx", + Type: HardwareAttestation, + DevicePaths: []string{"dev/tdx-guest"}, + DeviceLimit: 1, + } + cdp := constructTestPlugin(t, spec) stream := listAndWatchServerStub{} - - endSignal := make(chan int) + endSignal := make(chan struct{}) var g run.Group { g.Add(func() error { - for { - select { - case <-endSignal: - return nil - // no error. - case <-time.After(deviceCheckInterval + testBuffer): - stream.SetTestComplete() - ccDevicePath := "/tmp/testccdevice" + t.Name() - ccMeasurmentPath := "/tmp/testmeasurement" + t.Name() - os.Remove(ccDevicePath) - os.Remove(ccMeasurmentPath) - return nil - } + select { + case <-endSignal: + return nil + case <-time.After(deviceCheckInterval + testBuffer): + stream.SetTestComplete() + os.Remove(spec.DevicePaths[0]) + return nil } }, func(error) {}) } { g.Add(func() error { - err := testCcDevicePlugin.ListAndWatch(&v1beta1.Empty{}, &stream) - if err != nil { - if err.Error() != "" { - t.Errorf("ListAndWatch failed") - endSignal <- 0 - } else { - return nil - } + err := cdp.ListAndWatch(&v1beta1.Empty{}, &stream) + if err != nil && err.Error() != "test complete" { + t.Errorf("ListAndWatch failed: %v", err) + close(endSignal) } - return err + return nil }, func(error) {}) } - g.Run() + if err := g.Run(); err != nil && err.Error() != "test complete" { + t.Errorf("run group failed: %v", err) + } } diff --git a/deviceplugin/plugin.go b/deviceplugin/plugin.go index a330c81..cb49076 100644 --- a/deviceplugin/plugin.go +++ b/deviceplugin/plugin.go @@ -102,7 +102,7 @@ Outer: err := p.runOnce(ctx) if err != nil { lastErrorTime = time.Now() - level.Warn(p.logger).Log("msg", "encountered error while running plugin", "err", err) + _ = level.Warn(p.logger).Log("msg", "encountered error while running plugin", "err", err) select { case <-ctx.Done(): break Outer @@ -129,7 +129,7 @@ Outer: // This makes it convenient to run in a run.Group. func (p *plugin) serve(ctx context.Context) (func() error, func(error), error) { // Run the gRPC server. - level.Info(p.logger).Log("msg", "listening on Unix socket", "socket", p.socket) + _ = level.Info(p.logger).Log("msg", "listening on Unix socket", "socket", p.socket) l, err := net.Listen("unix", p.socket) if err != nil { return nil, nil, fmt.Errorf("failed to listen on Unix socket %q: %v", p.socket, err) @@ -137,7 +137,7 @@ func (p *plugin) serve(ctx context.Context) (func() error, func(error), error) { ch := make(chan error) go func() { - level.Info(p.logger).Log("msg", "starting gRPC server") + _ = level.Info(p.logger).Log("msg", "starting gRPC server") ch <- p.grpcServer.Serve(l) close(ch) }() @@ -148,7 +148,7 @@ Outer: for range p.grpcServer.GetServiceInfo() { break Outer } - level.Info(p.logger).Log("msg", "waiting for gRPC server to be ready") + _ = level.Info(p.logger).Log("msg", "waiting for gRPC server to be ready") select { case <-ctx.Done(): return nil, nil, ctx.Err() @@ -164,7 +164,7 @@ Outer: // Drain the channel to clean up. <-ch if err := l.Close(); err != nil { - level.Warn(p.logger).Log("msg", "encountered error while closing the listener", "err", err) + _ = level.Warn(p.logger).Log("msg", "encountered error while closing the listener", "err", err) } }, nil } @@ -190,7 +190,7 @@ func (p *plugin) runOnce(ctx context.Context) error { ctx, cancel := context.WithCancel(ctx) g.Add(func() error { defer cancel() - level.Info(p.logger).Log("msg", "waiting for the gRPC server to be ready") + _ = level.Info(p.logger).Log("msg", "waiting for the gRPC server to be ready") c, err := grpc.DialContext(ctx, p.socket, grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock(), grpc.WithContextDialer(func(ctx context.Context, addr string) (net.Conn, error) { return (&net.Dialer{}).DialContext(ctx, "unix", addr) @@ -202,11 +202,11 @@ func (p *plugin) runOnce(ctx context.Context) error { if err := c.Close(); err != nil { return fmt.Errorf("failed to close connection to local gRPC server: %v", err) } - level.Info(p.logger).Log("msg", "the gRPC server is ready") + _ = level.Info(p.logger).Log("msg", "the gRPC server is ready") if err := p.registerWithKubelet(); err != nil { return fmt.Errorf("failed to register with kubelet: %v", err) } - level.Info(p.logger).Log("msg", "the registration is complete") + _ = level.Info(p.logger).Log("msg", "the registration is complete") <-ctx.Done() return nil }, func(error) { @@ -239,7 +239,7 @@ func (p *plugin) runOnce(ctx context.Context) error { } func (p *plugin) registerWithKubelet() error { - level.Info(p.logger).Log("msg", "registering plugin with kubelet") + _ = level.Info(p.logger).Log("msg", "registering plugin with kubelet") conn, err := grpc.Dial(filepath.Join(p.pluginDir, p.kubeSocketBase), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithContextDialer(func(ctx context.Context, addr string) (net.Conn, error) { d := &net.Dialer{} diff --git a/deviceplugin/plugin_test.go b/deviceplugin/plugin_test.go index 8017e9f..9844667 100644 --- a/deviceplugin/plugin_test.go +++ b/deviceplugin/plugin_test.go @@ -165,7 +165,9 @@ func TestRegisterWithKublet(t *testing.T) { }, func(error) {}) } - g.Run() + if err := g.Run(); err != nil && err.Error() != "test complete" { + t.Errorf("run group failed: %v", err) + } } func maybeLogError(f func() error, message string) { diff --git a/main.go b/main.go index 852f0e3..f268a23 100644 --- a/main.go +++ b/main.go @@ -17,7 +17,6 @@ package main import ( "context" - "encoding/base64" "fmt" "net" "net/http" @@ -26,7 +25,6 @@ import ( "path/filepath" "strings" "syscall" - "time" "github.com/go-kit/log" "github.com/go-kit/log/level" @@ -62,23 +60,44 @@ var ( // Main is the principal function for the binary, wrapped only by `main` for convenience. func Main() error { - ccResource := "google.com/cc" - ccDevicePaths := []string{"/dev/tpmrm0"} - ccMeasurmentPaths := []string{"/sys/kernel/security/tpm0/binary_bios_measurements"} + // We create a list of specs, one for each device type. + allDeviceSpecs := []*deviceplugin.CcDeviceSpec{ + { + // vTPM for standard Confidential VMs + Resource: "google.com/cc", + Type: deviceplugin.SoftwareAttestation, // Explicitly marked as software + DevicePaths: []string{"/dev/tpmrm0"}, + MeasurementPaths: []string{"/sys/kernel/security/tpm0/binary_bios_measurements"}, + DeviceLimit: 256, // Allow multiple pods to share the vTPM + }, + { + // Intel TDX + Resource: "intel.com/tdx", + Type: deviceplugin.HardwareAttestation, // Explicitly marked as hardware + DevicePaths: []string{"/dev/tdx-guest", "/dev/tdx_guest"}, // Some kernels use different names + // TDX does not have a separate measurement file, attestation is done via ioctl. + MeasurementPaths: []string{}, + DeviceLimit: 1, // Only one container can use the TDX device at a time per node + }, + { + // AMD SEV-SNP + Resource: "amd.com/sev-snp", + Type: deviceplugin.HardwareAttestation, // Explicitly marked as hardware + DevicePaths: []string{"/dev/sev-guest"}, + // SEV-SNP also uses ioctl for attestation. + MeasurementPaths: []string{}, + DeviceLimit: 1, // Only one container can use the SEV-SNP device at a time per node + }, + } devicePluginPath := v1beta1.DevicePluginPath + socketPrefix := "cc-device-plugin" // by default, only track warning and error log logLevel := flag.String("log-level", logLevelWarn, fmt.Sprintf("Log level available values: %s", availableLogLevels)) listen := flag.String("listen", ":8080", "The listening port for health and metrics.") flag.Parse() - ccDeviceSpec := &deviceplugin.CcDeviceSpec{ - Resource: ccResource, - DevicePaths: ccDevicePaths, - MeasurementPaths: ccMeasurmentPaths, - } - logger := log.NewJSONLogger(log.NewSyncWriter(os.Stdout)) switch *logLevel { case logLevelAll: @@ -105,9 +124,21 @@ func Main() error { collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}), ) + // Defer socket cleanup + defer func() { + _ = level.Info(logger).Log("msg", "Cleaning up potential socket files") + for _, spec := range allDeviceSpecs { + safeResourceName := strings.ReplaceAll(spec.Resource, "/", "-") + socketPath := filepath.Join(devicePluginPath, fmt.Sprintf("%s-%s.sock", socketPrefix, safeResourceName)) + if err := os.Remove(socketPath); err != nil && !os.IsNotExist(err) { + _ = level.Warn(logger).Log("msg", "Failed to remove socket file", "path", socketPath, "error", err) + } + } + }() + var g run.Group { - // Run the HTTP server. + // Run the HTTP server for metrics and health checks. mux := http.NewServeMux() mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) @@ -137,7 +168,7 @@ func Main() error { for { select { case <-term: - logger.Log("msg", "caught interrupt; gracefully cleaning up; see you next time!") + _ = level.Info(logger).Log("msg", "caught interrupt; gracefully cleaning up; see you next time!") return nil case <-cancel: return nil @@ -151,22 +182,41 @@ func Main() error { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - socketPrefix := "cc-device-plugin" - socket := filepath.Join(devicePluginPath, fmt.Sprintf("%s-%s-%d.sock", socketPrefix, base64.StdEncoding.EncodeToString([]byte(ccResource)), time.Now().Unix())) - tp, err := deviceplugin.NewCcDevicePlugin(ccDeviceSpec, devicePluginPath, socket, log.With(logger, "resource", ccDeviceSpec.Resource), prometheus.WrapRegistererWith(prometheus.Labels{"resource": ccDeviceSpec.Resource}, r)) - if err != nil { + pluginCreationErrors := false + // The run.Group `g` will manage all of them concurrently. + for _, spec := range allDeviceSpecs { + // Use a local variable for the spec in the closure + ccDeviceSpec := spec + safeResourceName := strings.ReplaceAll(ccDeviceSpec.Resource, "/", "-") + socket := filepath.Join(devicePluginPath, fmt.Sprintf("%s-%s.sock", socketPrefix, safeResourceName)) + + // Create a new device plugin instance for the current device spec + p, err := deviceplugin.NewCcDevicePlugin(ccDeviceSpec, devicePluginPath, socket, log.With(logger, "resource", ccDeviceSpec.Resource), prometheus.WrapRegistererWith(prometheus.Labels{"resource": ccDeviceSpec.Resource}, r)) + if err != nil { + _ = level.Error(logger).Log("msg", "Failed to create new device plugin", "resource", ccDeviceSpec.Resource, "error", err) + pluginCreationErrors = true // Mark that at least one plugin failed + continue + } + + // Add the device plugin server to the run.Group + g.Add(func() error { + _ = level.Info(logger).Log("msg", "Starting the cc-device-plugin", "resource", ccDeviceSpec.Resource) + return p.Run(ctx) + }, func(error) { + // This will be called on shutdown, ensuring the context is cancelled for this plugin instance. + cancel() + }) + } + + if err := g.Run(); err != nil { return err } - // Start the cc device plugin server. - g.Add(func() error { - logger.Log("msg", fmt.Sprintf("Starting the cc-device-plugin for %q.", ccDeviceSpec.Resource)) - return tp.Run(ctx) - }, func(error) { - cancel() - }) + if pluginCreationErrors { + return fmt.Errorf("one or more device plugins failed to initialize") + } - return g.Run() + return nil } func main() { diff --git a/manifests/cc-device-plugin.yaml b/manifests/cc-device-plugin.yaml index 9e2bd1c..63dab45 100644 --- a/manifests/cc-device-plugin.yaml +++ b/manifests/cc-device-plugin.yaml @@ -34,7 +34,8 @@ spec: - operator: "Exists" effect: "NoSchedule" containers: - - image: us-central1-docker.pkg.dev/gce-confidential-compute/release/cc-device-plugin + - image: us-central1-docker.pkg.dev/gce-confidential-compute/release/cc-device-plugin:v1.1.0 + imagePullPolicy: Always name: cc-device-plugin resources: requests: diff --git a/manifests/example-deployment-manifest.yaml b/manifests/example-deployment-manifest.yaml index 7349de7..2a17558 100644 --- a/manifests/example-deployment-manifest.yaml +++ b/manifests/example-deployment-manifest.yaml @@ -32,7 +32,7 @@ spec: image: nginx ports: - containerPort: 8080 - name: http + name: http resources: limits: - google.com/cc: 1 + google.com/cc: "1" diff --git a/manifests/test-pods/pod-snp.yaml b/manifests/test-pods/pod-snp.yaml new file mode 100644 index 0000000..fafa61e --- /dev/null +++ b/manifests/test-pods/pod-snp.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Pod +metadata: + name: snp-test-pod +spec: + containers: + - name: test-container + image: alpine + command: ["/bin/sh", "-c"] + args: + - | + echo "Checking for SEV-SNP device..." + ls -l /dev/sev-guest + echo "SNP container started successfully" + sleep 3600 + resources: + limits: + amd.com/sev-snp: "1" + requests: + amd.com/sev-snp: "1" + nodeSelector: + cloud.google.com/gke-confidential-nodes-instance-type: SEV_SNP \ No newline at end of file diff --git a/manifests/test-pods/pod-tdx.yaml b/manifests/test-pods/pod-tdx.yaml new file mode 100644 index 0000000..929c257 --- /dev/null +++ b/manifests/test-pods/pod-tdx.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Pod +metadata: + name: tdx-test-pod +spec: + containers: + - name: test-container + image: alpine + command: ["/bin/sh", "-c"] + args: + - | + echo "Checking for TDX device..." + ls -l /dev/tdx* + echo "TDX container started successfully" + sleep 3600 + resources: + limits: + intel.com/tdx: "1" + requests: + intel.com/tdx: "1" + nodeSelector: + cloud.google.com/gke-confidential-nodes-instance-type: TDX \ No newline at end of file