diff --git a/.github/workflows/lib-build.yaml b/.github/workflows/lib-build.yaml index dfc1d263b..90e0b6879 100644 --- a/.github/workflows/lib-build.yaml +++ b/.github/workflows/lib-build.yaml @@ -36,6 +36,7 @@ jobs: - sgx-sdk-demo - sgx-aesmd-demo - dsa-dpdk-dmadevtest + - stress-ng-gramine builder: [buildah, docker] steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 diff --git a/Makefile b/Makefile index fd64a2b6f..7805b7015 100644 --- a/Makefile +++ b/Makefile @@ -166,7 +166,7 @@ e2e-iaa: @$(GO) test -v ./test/e2e/... -ginkgo.v -ginkgo.show-node-events -ginkgo.focus "Device:iaa.*$(ADDITIONAL_FOCUS_REGEX)" $(GENERATED_SKIP_OPT) -delete-namespace-on-failure=false e2e-spr: - @$(GO) test -v ./test/e2e/... -ginkgo.v -ginkgo.show-node-events -ginkgo.focus "Device:(iaa|dsa)|Device:qat.*Mode:dpdk.*Resource:(cy|dc).*" -ginkgo.focus "Device:sgx.*|(SGX Admission)" -ginkgo.focus "Device:gpu.*Resource:i915" $(GENERATED_SKIP_OPT) -delete-namespace-on-failure=false + @$(GO) test -v ./test/e2e/... -ginkgo.v -ginkgo.show-node-events -ginkgo.focus "Device:(iaa|dsa)|Device:qat.*Mode:dpdk.*Resource:(cy|dc).*" -ginkgo.focus "Device:sgx.*|(SGX Admission)" -ginkgo.focus "Device:gpu.*Resource:i915" -ginkgo.skip "App:sgx-epc-cgroup" $(GENERATED_SKIP_OPT) -delete-namespace-on-failure=false pre-pull: ifeq ($(TAG),devel) diff --git a/demo/stress-ng-gramine/Dockerfile b/demo/stress-ng-gramine/Dockerfile new file mode 100644 index 000000000..f54097fd4 --- /dev/null +++ b/demo/stress-ng-gramine/Dockerfile @@ -0,0 +1,13 @@ +FROM gramineproject/gramine:1.7-jammy + +RUN apt-get update \ + && env DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + stress-ng \ + make + +COPY * /stress-ng/ +WORKDIR /stress-ng + +RUN gramine-sgx-gen-private-key && \ + make SGX=1 +ENTRYPOINT ["/usr/bin/gramine-sgx"] diff --git a/demo/stress-ng-gramine/build/Makefile b/demo/stress-ng-gramine/build/Makefile new file mode 100644 index 000000000..b7ccb07c7 --- /dev/null +++ b/demo/stress-ng-gramine/build/Makefile @@ -0,0 +1,52 @@ +ARCH_LIBDIR ?= /lib/x86_64-linux-gnu + +ifeq ($(DEBUG),1) + GRAMINE_LOG_LEVEL = debug +else + GRAMINE_LOG_LEVEL = error +endif + +.PHONY: all +all: stress-ng.manifest stress-ng-edmm.manifest +ifeq ($(SGX),1) +all: stress-ng.manifest.sgx stress-ng.sig stress-ng-edmm.manifest.sgx stress-ng-edmm.sig +endif + +stress-ng.manifest: stress-ng.manifest.template + gramine-manifest \ + -Dlog_level=$(GRAMINE_LOG_LEVEL) \ + -Dedmm='false' \ + -Denclave_size=128M \ + -Dexecdir=$(shell dirname $(shell which stress-ng)) \ + -Darch_libdir=$(ARCH_LIBDIR) \ + $< >$@ + +stress-ng.manifest.sgx: stress-ng.manifest + gramine-sgx-sign \ + --manifest stress-ng.manifest \ + --output $@ + +stress-ng.sig: stress-ng.manifest.sgx + +stress-ng-edmm.manifest: stress-ng.manifest.template + gramine-manifest \ + -Dlog_level=$(GRAMINE_LOG_LEVEL) \ + -Dedmm='true' \ + -Denclave_size=128G \ + -Dexecdir=$(shell dirname $(shell which stress-ng)) \ + -Darch_libdir=$(ARCH_LIBDIR) \ + $< >$@ + +stress-ng-edmm.manifest.sgx: stress-ng.manifest + gramine-sgx-sign \ + --manifest stress-ng-edmm.manifest \ + --output $@ + +stress-ng-edmm.sig: stress-ng-edmm.manifest.sgx + +.PHONY: clean +clean: + $(RM) *.manifest *.manifest.sgx *.token *.sig OUTPUT + +.PHONY: distclean +distclean: clean diff --git a/demo/stress-ng-gramine/build/stress-ng.manifest.template b/demo/stress-ng-gramine/build/stress-ng.manifest.template new file mode 100644 index 000000000..38169aec9 --- /dev/null +++ b/demo/stress-ng-gramine/build/stress-ng.manifest.template @@ -0,0 +1,29 @@ +loader.entrypoint = "file:{{ gramine.libos }}" +libos.entrypoint = "{{ execdir }}/stress-ng" + +loader.log_level = "{{ log_level }}" +loader.insecure__use_cmdline_argv = true + +loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}:/usr{{ arch_libdir }}" +loader.env.PATH = "{{ execdir }}" + +fs.mounts = [ + { path = "/lib", uri = "file:{{ gramine.runtimedir() }}" }, + { path = "{{ arch_libdir }}", uri = "file:{{ arch_libdir }}" }, + { path = "/usr/lib", uri = "file:/usr/lib" }, + { path = "/stress-ng", uri = "file:/stress-ng" }, + { path = "{{ execdir }}", uri = "file:{{ execdir }}" }, +] + +sgx.debug = false +sgx.edmm_enable = {{ edmm }} +sgx.enclave_size = "{{ enclave_size }}" +sgx.max_threads = 6 + +sgx.trusted_files = [ + "file:{{ gramine.libos }}", + "file:{{ execdir }}/", + "file:{{ gramine.runtimedir() }}/", + "file:{{ arch_libdir }}/", + "file:/usr/{{ arch_libdir }}/", +] diff --git a/deployments/operator/crd/bases/deviceplugin.intel.com_sgxdeviceplugins.yaml b/deployments/operator/crd/bases/deviceplugin.intel.com_sgxdeviceplugins.yaml index f883bd211..41062a4ec 100644 --- a/deployments/operator/crd/bases/deviceplugin.intel.com_sgxdeviceplugins.yaml +++ b/deployments/operator/crd/bases/deviceplugin.intel.com_sgxdeviceplugins.yaml @@ -78,6 +78,12 @@ spec: description: NodeSelector provides a simple way to constrain device plugin pods to nodes with particular labels. type: object + nriImage: + description: |- + NRIImage is a container image with SGX Node Resource Interface (NRI) plugin executable. Set + this value if SGX EPC cgroups limits enforcement is wanted. + TODO: is this a good name? + type: string provisionLimit: description: ProvisionLimit is a number of containers that can share the same SGX provision device. diff --git a/deployments/operator/samples/deviceplugin_v1_sgxdeviceplugin.yaml b/deployments/operator/samples/deviceplugin_v1_sgxdeviceplugin.yaml index 5ebba3557..ed12ffdb0 100644 --- a/deployments/operator/samples/deviceplugin_v1_sgxdeviceplugin.yaml +++ b/deployments/operator/samples/deviceplugin_v1_sgxdeviceplugin.yaml @@ -4,6 +4,7 @@ metadata: name: sgxdeviceplugin-sample spec: image: intel/intel-sgx-plugin:0.32.0 + nriImage: ghcr.io/containers/nri-plugins/nri-sgx-epc:v0.8.0 enclaveLimit: 110 provisionLimit: 110 logLevel: 4 diff --git a/deployments/sgx_epc_metrics/kustomization.yaml b/deployments/sgx_epc_metrics/kustomization.yaml new file mode 100644 index 000000000..b27597218 --- /dev/null +++ b/deployments/sgx_epc_metrics/kustomization.yaml @@ -0,0 +1,5 @@ +resources: + - "https://github.com/google/cadvisor/deploy/kubernetes/base?ref=master" + - service.yaml +patches: + - path: misc-metrics.yaml diff --git a/deployments/sgx_epc_metrics/misc-metrics.yaml b/deployments/sgx_epc_metrics/misc-metrics.yaml new file mode 100644 index 000000000..e96ea1b64 --- /dev/null +++ b/deployments/sgx_epc_metrics/misc-metrics.yaml @@ -0,0 +1,18 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: cadvisor + namespace: cadvisor +spec: + template: + spec: + nodeSelector: + intel.feature.node.kubernetes.io/sgx: 'true' + containers: + - name: cadvisor + image: docker.io/library/cadvisor:4af2b9b9 + command: [ + "/usr/bin/cadvisor", + "-enable_metrics", "misc", + "-logtostderr" + ] diff --git a/deployments/sgx_epc_metrics/service.yaml b/deployments/sgx_epc_metrics/service.yaml new file mode 100644 index 000000000..746d3984d --- /dev/null +++ b/deployments/sgx_epc_metrics/service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app: cadvisor + name: cadvisor + namespace: cadvisor +spec: + ports: + - name: http + port: 8080 + targetPort: http + selector: + app: cadvisor diff --git a/deployments/sgx_plugin/base/intel-sgx-plugin.yaml b/deployments/sgx_plugin/base/intel-sgx-plugin.yaml index 356650554..d0c710a17 100644 --- a/deployments/sgx_plugin/base/intel-sgx-plugin.yaml +++ b/deployments/sgx_plugin/base/intel-sgx-plugin.yaml @@ -18,6 +18,7 @@ spec: labels: app: intel-sgx-plugin spec: + priorityClassName: system-node-critical automountServiceAccountToken: false containers: - name: intel-sgx-plugin diff --git a/deployments/sgx_plugin/overlays/epc-cgroups/kustomization.yaml b/deployments/sgx_plugin/overlays/epc-cgroups/kustomization.yaml new file mode 100644 index 000000000..90ea7ec41 --- /dev/null +++ b/deployments/sgx_plugin/overlays/epc-cgroups/kustomization.yaml @@ -0,0 +1,7 @@ +resources: + - ../../base + +patches: +- path: nri_plugin_patch.yaml + target: + name: intel-sgx-plugin diff --git a/deployments/sgx_plugin/overlays/epc-cgroups/nri_plugin_patch.yaml b/deployments/sgx_plugin/overlays/epc-cgroups/nri_plugin_patch.yaml new file mode 100644 index 000000000..243e24ffc --- /dev/null +++ b/deployments/sgx_plugin/overlays/epc-cgroups/nri_plugin_patch.yaml @@ -0,0 +1,21 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: intel-sgx-plugin +spec: + template: + spec: + containers: + - name: nri-sgx-epc + image: ghcr.io/containers/nri-plugins/nri-sgx-epc:unstable + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + imagePullPolicy: IfNotPresent + volumeMounts: + - name: nrisockets + mountPath: /var/run/nri + volumes: + - name: nrisockets + hostPath: + path: /var/run/nri diff --git a/pkg/apis/deviceplugin/v1/sgxdeviceplugin_types.go b/pkg/apis/deviceplugin/v1/sgxdeviceplugin_types.go index 1b2bfb6be..3575f3443 100644 --- a/pkg/apis/deviceplugin/v1/sgxdeviceplugin_types.go +++ b/pkg/apis/deviceplugin/v1/sgxdeviceplugin_types.go @@ -35,6 +35,11 @@ type SgxDevicePluginSpec struct { // Recommendation is to leave this unset and prefer the SGX NodeFeatureRule instead. InitImage string `json:"initImage,omitempty"` + // NRIImage is a container image with SGX Node Resource Interface (NRI) plugin executable. Set + // this value if SGX EPC cgroups limits enforcement is wanted. + // TODO: is this a good name? + NRIImage string `json:"nriImage,omitempty"` + // Specialized nodes (e.g., with accelerators) can be Tainted to make sure unwanted pods are not scheduled on them. Tolerations can be set for the plugin pod to neutralize the Taint. Tolerations []v1.Toleration `json:"tolerations,omitempty"` diff --git a/pkg/controllers/sgx/controller.go b/pkg/controllers/sgx/controller.go index 05a7f1635..9ec49bcff 100644 --- a/pkg/controllers/sgx/controller.go +++ b/pkg/controllers/sgx/controller.go @@ -112,6 +112,27 @@ func setInitContainer(spec *v1.PodSpec, imageName string) { addVolumeIfMissing(spec, "nfd-features", "/etc/kubernetes/node-feature-discovery/source.d/", v1.HostPathDirectoryOrCreate) } +func setNRIContainer(spec *v1.PodSpec, imageName string) { + yes := true + no := false + spec.Containers = append(spec.Containers, v1.Container{ + Name: "nri-sgx-epc", + Image: imageName, + ImagePullPolicy: "IfNotPresent", + SecurityContext: &v1.SecurityContext{ + ReadOnlyRootFilesystem: &yes, + AllowPrivilegeEscalation: &no, + }, + VolumeMounts: []v1.VolumeMount{ + { + Name: "nrisockets", + MountPath: "/var/run/nri", + }, + }, + }) + addVolumeIfMissing(spec, "nrisockets", "/var/run/nri", v1.HostPathDirectoryOrCreate) +} + func (c *controller) NewDaemonSet(rawObj client.Object) *apps.DaemonSet { devicePlugin := rawObj.(*devicepluginv1.SgxDevicePlugin) @@ -135,6 +156,10 @@ func (c *controller) NewDaemonSet(rawObj client.Object) *apps.DaemonSet { if devicePlugin.Spec.InitImage != "" { setInitContainer(&daemonSet.Spec.Template.Spec, devicePlugin.Spec.InitImage) } + // add the optional NRI plugin container + if devicePlugin.Spec.NRIImage != "" { + setNRIContainer(&daemonSet.Spec.Template.Spec, devicePlugin.Spec.NRIImage) + } return daemonSet } @@ -171,6 +196,26 @@ func (c *controller) UpdateDaemonSet(rawObj client.Object, ds *apps.DaemonSet) ( updated = true } + // remove NRI plugin + if len(ds.Spec.Template.Spec.Containers) > 1 && dp.Spec.NRIImage == "" { + ds.Spec.Template.Spec.Containers = []v1.Container{ds.Spec.Template.Spec.Containers[0]} + ds.Spec.Template.Spec.Volumes = removeVolume(ds.Spec.Template.Spec.Volumes, "nrisockets") + updated = true + } + + // update NRI plugin image + if len(ds.Spec.Template.Spec.Containers) > 1 && ds.Spec.Template.Spec.Containers[1].Image != dp.Spec.NRIImage { + ds.Spec.Template.Spec.Containers[1].Image = dp.Spec.NRIImage + updated = true + } + + // add NRI plugin image + if len(ds.Spec.Template.Spec.Containers) == 1 && dp.Spec.NRIImage != "" { + setNRIContainer(&ds.Spec.Template.Spec, dp.Spec.NRIImage) + + updated = true + } + if len(dp.Spec.NodeSelector) > 0 { if !reflect.DeepEqual(ds.Spec.Template.Spec.NodeSelector, dp.Spec.NodeSelector) { ds.Spec.Template.Spec.NodeSelector = dp.Spec.NodeSelector diff --git a/pkg/controllers/sgx/controller_test.go b/pkg/controllers/sgx/controller_test.go index 47f653ed1..4412a33e3 100644 --- a/pkg/controllers/sgx/controller_test.go +++ b/pkg/controllers/sgx/controller_test.go @@ -75,6 +75,7 @@ func (c *controller) newDaemonSetExpected(rawObj client.Object) *apps.DaemonSet }, }, Spec: v1.PodSpec{ + PriorityClassName: "system-node-critical", AutomountServiceAccountToken: &no, Containers: []v1.Container{ { diff --git a/pkg/webhooks/sgx/sgx.go b/pkg/webhooks/sgx/sgx.go index 12fe286ce..05dc3cdf6 100644 --- a/pkg/webhooks/sgx/sgx.go +++ b/pkg/webhooks/sgx/sgx.go @@ -43,6 +43,7 @@ func (s *Mutator) SetupWebhookWithManager(mgr ctrl.Manager) error { } const ( + epcLimitKey = "epc-limit.nri.io/container" namespace = "sgx.intel.com" encl = namespace + "/enclave" epc = namespace + "/epc" @@ -156,6 +157,8 @@ func (s *Mutator) Default(ctx context.Context, obj runtime.Object) error { continue } + pod.Annotations[fmt.Sprintf("%s.%s", epcLimitKey, container.Name)] = fmt.Sprintf("%d", epcSize) + totalEpc += epcSize // Quote Generation Modes: diff --git a/scripts/set-version.sh b/scripts/set-version.sh index e9eed9d0f..172e68259 100755 --- a/scripts/set-version.sh +++ b/scripts/set-version.sh @@ -15,8 +15,8 @@ if [ $# != 1 ] || [ "$1" = "?" ] || [ "$1" = "--help" ]; then exit 1 fi -files=$(git grep -l '^TAG?*=\|intel/dsa-dpdk-dmadevtest:\|intel/accel-config-demo:\|intel/crypto-perf:\|intel/opae-nlb-demo:\|intel/openssl-qat-engine:\|intel/dlb-libdlb-demo:\|intel/sgx-sdk-demo:\|intel/intel-[^ ]*:\|version=\|appVersion:\|tag:' Makefile deployments demo/*accel-config*.yaml demo/*fpga*.yaml demo/*openssl*.yaml demo/dlb-libdlb*.yaml demo/dsa-dpdk-dmadev*.yaml pkg/controllers/*/*_test.go build/docker/*.Dockerfile test/e2e/*/*.go) +files=$(git grep -l '^TAG?*=\|intel/dsa-dpdk-dmadevtest:\|intel/accel-config-demo:\|intel/crypto-perf:\|intel/opae-nlb-demo:\|intel/openssl-qat-engine:\|intel/dlb-libdlb-demo:\|intel/stress-ng-gramine:\|intel/sgx-sdk-demo:\|intel/intel-[^ ]*:\|version=\|appVersion:\|tag:' Makefile deployments demo/*accel-config*.yaml demo/*fpga*.yaml demo/*openssl*.yaml demo/dlb-libdlb*.yaml demo/dsa-dpdk-dmadev*.yaml pkg/controllers/*/*_test.go build/docker/*.Dockerfile test/e2e/*/*.go) for file in $files; do - sed -i -e "s;\(^TAG?*=\|intel/dsa-dpdk-dmadevtest:\|intel/accel-config-demo:\|intel/crypto-perf:\|intel/opae-nlb-demo:\|intel/openssl-qat-engine:\|intel/dlb-libdlb-demo:\|intel/sgx-sdk-demo:\|intel/intel-[^ ]*:\|version=\|appVersion: [^ ]\|tag: [^ ]\)[^ \"]*;\1$1;g" "$file"; + sed -i -e "s;\(^TAG?*=\|intel/dsa-dpdk-dmadevtest:\|intel/accel-config-demo:\|intel/crypto-perf:\|intel/opae-nlb-demo:\|intel/openssl-qat-engine:\|intel/dlb-libdlb-demo:\|intel/stress-ng-gramine:\|intel/sgx-sdk-demo:\|intel/intel-[^ ]*:\|version=\|appVersion: [^ ]\|tag: [^ ]\)[^ \"]*;\1$1;g" "$file"; done diff --git a/test/e2e/sgx/sgx.go b/test/e2e/sgx/sgx.go index a41daf9b3..5db9e894c 100644 --- a/test/e2e/sgx/sgx.go +++ b/test/e2e/sgx/sgx.go @@ -16,6 +16,7 @@ package sgx import ( "context" + "fmt" "path/filepath" "time" @@ -28,6 +29,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/kubernetes/test/e2e/framework" e2edebug "k8s.io/kubernetes/test/e2e/framework/debug" + e2ejob "k8s.io/kubernetes/test/e2e/framework/job" e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" admissionapi "k8s.io/pod-security-admission/api" @@ -38,6 +40,10 @@ const ( timeout = time.Second * 120 kustomizationWebhook = "deployments/sgx_admissionwebhook/overlays/default-with-certmanager/kustomization.yaml" kustomizationPlugin = "deployments/sgx_plugin/base/kustomization.yaml" + // TODO: move to epc-cgroups overlay once available. + // kustomizationPlugin = "deployments/sgx_plugin/overlays/epc-cgroups/kustomization.yaml". + stressNGImage = "intel/stress-ng-gramine:devel" + stressNGEnclaveSize = 402653184 ) func init() { @@ -80,6 +86,9 @@ func describe() { }) ginkgo.Context("When SGX resources are available", func() { + var nodeWithEPC string + var epcCapacity int64 + ginkgo.BeforeEach(func(ctx context.Context) { ginkgo.By("checking if the resource is allocatable") if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "sgx.intel.com/epc", 150*time.Second, utils.WaitForPositiveResource); err != nil { @@ -91,6 +100,20 @@ func describe() { if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, "sgx.intel.com/provision", 30*time.Second, utils.WaitForPositiveResource); err != nil { framework.Failf("unable to wait for nodes to have positive allocatable provision resource: %v", err) } + + nodelist, err := f.ClientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) + if err != nil { + framework.Failf("failed to list Nodes: %v", err) + } + + // we have at least one node with sgx.intel.com/epc capacity + for _, item := range nodelist.Items { + if q, ok := item.Status.Allocatable["sgx.intel.com/epc"]; ok && q.Value() > 0 { + epcCapacity = q.Value() + nodeWithEPC = item.Name + break + } + } }) ginkgo.It("deploys a sgx-sdk-demo pod requesting SGX enclave resources [App:sgx-sdk-demo]", func(ctx context.Context) { @@ -120,6 +143,99 @@ func describe() { gomega.Expect(err).To(gomega.BeNil(), utils.GetPodLogs(ctx, f, pod.ObjectMeta.Name, "testcontainer")) }) + ginkgo.It("deploys simultaneous SGX EPC stressor jobs with equal EPC limits but no memory limits [App:sgx-epc-cgroup]", func(ctx context.Context) { + parallelism := int32(epcCapacity/stressNGEnclaveSize) + 1 + completions := int32(epcCapacity/stressNGEnclaveSize) + 1 + quantity := resource.NewQuantity(stressNGEnclaveSize/2, resource.BinarySI) + + testArgs := []string{ + "stress-ng", + "--verbose", + "--vm", + "1", + "--vm-bytes", + "20%", + "--page-in", + "-t", + "30", + } + job := e2ejob.NewTestJobOnNode("success", fmt.Sprintf("sgx-epc-stressjob-npods-%d", parallelism), v1.RestartPolicyNever, parallelism, completions, nil, 0, nodeWithEPC) + + job.Spec.Template.Spec.Containers[0].Image = stressNGImage + job.Spec.Template.Spec.Containers[0].Args = testArgs + job.Spec.Template.Spec.Containers[0].Resources = v1.ResourceRequirements{ + Requests: v1.ResourceList{"sgx.intel.com/epc": *quantity}, + Limits: v1.ResourceList{"sgx.intel.com/epc": *quantity}, + } + + job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job) + framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name) + + err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, nil, completions) + framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name) + }) + + ginkgo.It("deploys one SGX EPC stressor job with the EPC limit set to enclave size and the memory limit set very low [App:sgx-epc-cgroup]", func(ctx context.Context) { + quantity := resource.NewQuantity(stressNGEnclaveSize, resource.BinarySI) + + testArgs := []string{ + "stress-ng", + "--verbose", + "--vm", + "1", + "--vm-bytes", + "20%", + "--page-in", + "-t", + "30", + } + job := e2ejob.NewTestJobOnNode("success", "sgx-epc-stressjob-lowmemlimit", v1.RestartPolicyNever, 1, 1, nil, 0, nodeWithEPC) + + job.Spec.Template.Spec.Containers[0].Image = stressNGImage + job.Spec.Template.Spec.Containers[0].Args = testArgs + job.Spec.Template.Spec.Containers[0].Resources = v1.ResourceRequirements{ + Requests: v1.ResourceList{"sgx.intel.com/epc": *quantity}, + Limits: v1.ResourceList{"sgx.intel.com/epc": *quantity, + v1.ResourceMemory: resource.MustParse("42Mi")}, + } + + job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job) + framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name) + + err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, nil, 1) + framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name) + }) + + ginkgo.It("deploys one SGX EPC stressor job with EDMM that ramps EPC allocations and memory limit set to kill once enough EPC pages are reclaimed [App:sgx-epc-cgroup]", func(ctx context.Context) { + quantity := resource.NewQuantity(epcCapacity/10, resource.BinarySI) + + testArgs := []string{ + "stress-ng-edmm", + "--verbose", + "--bigheap", + "1", + "--bigheap-growth", + "10m", + "--page-in", + "-t", + "300", + } + job := e2ejob.NewTestJobOnNode("success", "sgx-epc-stressjob-oom", v1.RestartPolicyNever, 1, 1, nil, 0, nodeWithEPC) + + job.Spec.Template.Spec.Containers[0].Image = stressNGImage + job.Spec.Template.Spec.Containers[0].Args = testArgs + job.Spec.Template.Spec.Containers[0].Resources = v1.ResourceRequirements{ + Requests: v1.ResourceList{"sgx.intel.com/epc": *quantity}, + Limits: v1.ResourceList{"sgx.intel.com/epc": *quantity, + v1.ResourceMemory: *quantity}, + } + + job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job) + framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name) + err = e2ejob.WaitForJobFailed(f.ClientSet, f.Namespace.Name, job.Name) + framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name) + }) + ginkgo.When("there is no app to run [App:noapp]", func() { ginkgo.It("does nothing", func() {}) }) diff --git a/test/e2e/sgxadmissionwebhook/sgxaadmissionwebhook.go b/test/e2e/sgxadmissionwebhook/sgxaadmissionwebhook.go index 987f8cacd..a37f1fef8 100644 --- a/test/e2e/sgxadmissionwebhook/sgxaadmissionwebhook.go +++ b/test/e2e/sgxadmissionwebhook/sgxaadmissionwebhook.go @@ -69,6 +69,7 @@ func describe() { ginkgo.By("checking the pod total EPC size annotation is correctly set") gomega.Expect(pod.Annotations["sgx.intel.com/epc"]).To(gomega.Equal("1Mi")) + gomega.Expect(pod.Annotations["epc-limit.nri.io/container.test"]).To(gomega.Equal("1048576")) }) ginkgo.It("mutates created pods when the container contains the quote generation libraries", func(ctx context.Context) { ginkgo.By("submitting the pod") @@ -79,6 +80,7 @@ func describe() { ginkgo.By("checking the pod total EPC size annotation is correctly set") gomega.Expect(pod.Annotations["sgx.intel.com/epc"]).To(gomega.Equal("1Mi")) + gomega.Expect(pod.Annotations["epc-limit.nri.io/container.test"]).To(gomega.Equal("1048576")) }) ginkgo.It("mutates created pods when the container uses aesmd from a side-car container to generate quotes", func(ctx context.Context) { ginkgo.By("submitting the pod") @@ -93,6 +95,8 @@ func describe() { gomega.Expect(pod.Spec.Containers[0].Env[0].Value).To(gomega.Equal("1")) ginkgo.By("checking the pod total EPC size annotation is correctly set") gomega.Expect(pod.Annotations["sgx.intel.com/epc"]).To(gomega.Equal("2Mi")) + gomega.Expect(pod.Annotations["epc-limit.nri.io/container.test"]).To(gomega.Equal("1048576")) + gomega.Expect(pod.Annotations["epc-limit.nri.io/container.aesmd"]).To(gomega.Equal("1048576")) }) ginkgo.It("mutates created pods where one container uses host/daemonset aesmd to generate quotes", func(ctx context.Context) { ginkgo.By("submitting the pod") @@ -106,6 +110,7 @@ func describe() { gomega.Expect(pod.Spec.Containers[0].Env[0].Value).To(gomega.Equal("1")) ginkgo.By("checking the pod total EPC size annotation is correctly set") gomega.Expect(pod.Annotations["sgx.intel.com/epc"]).To(gomega.Equal("1Mi")) + gomega.Expect(pod.Annotations["epc-limit.nri.io/container.test"]).To(gomega.Equal("1048576")) }) ginkgo.It("mutates created pods where three containers use host/daemonset aesmd to generate quotes", func(ctx context.Context) { ginkgo.By("submitting the pod") @@ -125,6 +130,9 @@ func describe() { gomega.Expect(pod.Spec.Containers[2].Env[0].Value).To(gomega.Equal("1")) ginkgo.By("checking the pod total EPC size annotation is correctly set") gomega.Expect(pod.Annotations["sgx.intel.com/epc"]).To(gomega.Equal("3Mi")) + gomega.Expect(pod.Annotations["epc-limit.nri.io/container.test1"]).To(gomega.Equal("1048576")) + gomega.Expect(pod.Annotations["epc-limit.nri.io/container.test2"]).To(gomega.Equal("1048576")) + gomega.Expect(pod.Annotations["epc-limit.nri.io/container.test3"]).To(gomega.Equal("1048576")) }) ginkgo.It("checks that Volumes and VolumeMounts are created only once", func(ctx context.Context) { ginkgo.By("submitting the pod") diff --git a/test/envtest/sgxdeviceplugin_controller_test.go b/test/envtest/sgxdeviceplugin_controller_test.go index b346d40c8..3e90d3392 100644 --- a/test/envtest/sgxdeviceplugin_controller_test.go +++ b/test/envtest/sgxdeviceplugin_controller_test.go @@ -39,6 +39,7 @@ var _ = Describe("SgxDevicePlugin Controller", func() { spec := devicepluginv1.SgxDevicePluginSpec{ Image: "sgx-testimage", InitImage: "sgx-testinitimage", + NRIImage: "sgx-testnriimage", NodeSelector: map[string]string{"sgx-nodeselector": "true"}, } @@ -78,6 +79,7 @@ var _ = Describe("SgxDevicePlugin Controller", func() { By("updating SgxDevicePlugin successfully") updatedImage := "updated-sgx-testimage" updatedInitImage := "updated-sgx-testinitimage" + updatedNRIImage := "updated-sgx-testnriimage" updatedLogLevel := 2 updatedEnclaveLimit := 2 updatedProvisionLimit := 2 @@ -85,6 +87,7 @@ var _ = Describe("SgxDevicePlugin Controller", func() { fetched.Spec.Image = updatedImage fetched.Spec.InitImage = updatedInitImage + fetched.Spec.NRIImage = updatedNRIImage fetched.Spec.LogLevel = updatedLogLevel fetched.Spec.EnclaveLimit = updatedEnclaveLimit fetched.Spec.ProvisionLimit = updatedProvisionLimit @@ -114,13 +117,17 @@ var _ = Describe("SgxDevicePlugin Controller", func() { Expect(ds.Spec.Template.Spec.Containers[0].Args).Should(ConsistOf(expectArgs)) Expect(ds.Spec.Template.Spec.Containers[0].Image).Should(Equal(updatedImage)) Expect(ds.Spec.Template.Spec.InitContainers).To(HaveLen(1)) + Expect(ds.Spec.Template.Spec.Containers).To(HaveLen(2)) + Expect(ds.Spec.Template.Spec.Containers[1].Image).Should(Equal(updatedNRIImage)) Expect(ds.Spec.Template.Spec.InitContainers[0].Image).To(Equal(updatedInitImage)) Expect(ds.Spec.Template.Spec.NodeSelector).Should(Equal(updatedNodeSelector)) By("updating SgxDevicePlugin with different values successfully") updatedInitImage = "" + updatedNRIImage = "" updatedNodeSelector = map[string]string{} fetched.Spec.InitImage = updatedInitImage + fetched.Spec.NRIImage = updatedNRIImage fetched.Spec.NodeSelector = updatedNodeSelector Expect(k8sClient.Update(context.Background(), fetched)).Should(Succeed()) @@ -130,6 +137,7 @@ var _ = Describe("SgxDevicePlugin Controller", func() { err = k8sClient.Get(context.Background(), types.NamespacedName{Namespace: ns, Name: expectedDsName}, ds) Expect(err).To(BeNil()) Expect(ds.Spec.Template.Spec.InitContainers).To(HaveLen(0)) + Expect(ds.Spec.Template.Spec.Containers).To(HaveLen(1)) Expect(ds.Spec.Template.Spec.NodeSelector).Should(And(HaveLen(1), HaveKeyWithValue("kubernetes.io/arch", "amd64"))) By("updating SgxDevicePlugin with tolerations")