apiVersion: serving.kserve.io/v1alpha1 kind: ClusterServingRuntime metadata: name: meta-nim-llama-3.3-70b-instruct namespace: default spec: containers: - env: - name: NIM_CACHE_PATH value: /tmp - name: NGC_API_KEY valueFrom: secretKeyRef: name: nvidia-nim-secrets key: NGC_API_KEY image: upmdev.azurecr.io/nim/meta/llama-3.3-70b-instruct:1.8.4 name: kserve-container ports: - containerPort: 8000 protocol: TCP resources: limits: cpu: "24" memory: 600Gi requests: cpu: "24" memory: 600Gi volumeMounts: - mountPath: /dev/shm name: dshm imagePullSecrets: - name: edb-cred protocolVersions: - v2 - grpc-v2 supportedModelFormats: - autoSelect: true name: meta-nim-llama-3.3-70b-instruct priority: 1 version: "1.8.4" volumes: - emptyDir: medium: Memory sizeLimit: 60Gi name: dshm