apiVersion: serving.kserve.io/v1beta1 kind: InferenceService metadata: annotations: serving.kserve.io/enable-prometheus-scraping: "true" prometheus.kserve.io/port: "8000" prometheus.kserve.io/path: "/v1/metrics" name: meta-nim-llama-3-3-70b-instruct namespace: default spec: predictor: tolerations: - key: nvidia.com/gpu operator: Exists effect: NoSchedule nodeSelector: nvidia.com/gpu: "true" imagePullSecrets: - name: edb-cred model: modelFormat: name: meta-nim-llama-3.3-70b-instruct resources: limits: nvidia.com/gpu: "4" requests: nvidia.com/gpu: "4" runtime: meta-nim-llama-3.3-70b-instruct