-
Notifications
You must be signed in to change notification settings - Fork 277
/
Copy pathguardrails-gaudi-values.yaml
86 lines (78 loc) · 1.95 KB
/
guardrails-gaudi-values.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# Copyright (C) 2025 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# Accelerate inferencing in heaviest components to improve performance
# by overriding their subchart values
image:
repository: opea/chatqna
CHATQNA_TYPE: "CHATQNA_GUARDRAILS"
# guardrails related config
guardrails-usvc:
enabled: true
SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-vllm-guardrails"
SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
retryTimeoutSeconds: 720
# gaudi related config
# tei running on CPU by default
# tei:
# accelDevice: "gaudi"
# image:
# repository: ghcr.io/huggingface/tei-gaudi
# tag: 1.5.0
# resources:
# limits:
# habana.ai/gaudi: 1
# securityContext:
# readOnlyRootFilesystem: false
teirerank:
accelDevice: "gaudi"
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
MAX_WARMUP_SEQUENCE_LENGTH: "512"
image:
repository: ghcr.io/huggingface/tei-gaudi
tag: "1.5.0"
resources:
limits:
habana.ai/gaudi: 1
securityContext:
readOnlyRootFilesystem: false
readinessProbe:
timeoutSeconds: 1
vllm-guardrails:
enabled: true
accelDevice: "gaudi"
image:
repository: opea/vllm-gaudi
LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
resources:
limits:
habana.ai/gaudi: 1
extraCmdArgs: [
"--tensor-parallel-size", "1",
"--block-size", "128",
"--max-num-seqs", "256",
"--max-seq-len-to-capture", "2048"
]
startupProbe:
failureThreshold: 360
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
tgi:
enabled: false
vllm:
enabled: true
shmSize: 1Gi
accelDevice: "gaudi"
image:
repository: opea/vllm-gaudi
resources:
limits:
habana.ai/gaudi: 1
startupProbe:
failureThreshold: 360
PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
OMPI_MCA_btl_vader_single_copy_mechanism: "none"
extraCmdArgs: [
"--tensor-parallel-size", "1",
"--block-size", "128",
"--max-num-seqs", "256",
"--max-seq-len-to-capture", "2048"
]