opea-project · xiaotia3 · Apr 3, 2025 · Apr 16, 2025 · Apr 16, 2025 · Apr 17, 2025
@@ -64,7 +64,7 @@ We remind you that when using a specific version of the code, you need to use th
 - #### Optional. Pull TGI Docker Image (Do this if you want to use TGI)
 
   ```bash
-  docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
   ```
 
 - #### Build Docker Images
@@ -110,7 +110,7 @@ We remind you that when using a specific version of the code, you need to use th
 
   ##### TGI-based application:
 
-  - ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  - ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
   - opea/agent:latest
   - redis/redis-stack:7.2.0-v9
   - ghcr.io/huggingface/text-embeddings-inference:cpu-1.5

@@ -2,7 +2,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:3.0.0-rocm
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     container_name: tgi-service
     ports:
       - "${TGI_SERVICE_PORT-8085}:80"

@@ -68,7 +68,7 @@ We remind you that when using a specific version of the code, you need to use th
 - #### Optional. Pull TGI Docker Image (Do this if you want to use TGI)
 
   ```bash
-  docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
   ```
 
 - #### Build Docker Images
@@ -94,7 +94,7 @@ We remind you that when using a specific version of the code, you need to use th
 
   ##### TGI-based application:
 
-  - ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  - ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     - opea/whisper:latest
     - opea/speecht5:latest
     - opea/audioqna:latest

@@ -25,7 +25,7 @@ services:
       https_proxy: ${https_proxy}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-service
     ports:
       - ${LLM_SERVER_PORT:-3006}:80

@@ -35,7 +35,7 @@ function build_docker_images() {
     service_list="audioqna audioqna-ui whisper-gaudi speecht5-gaudi"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1
     docker images && sleep 1s
 }
 

@@ -35,7 +35,7 @@ function build_docker_images() {
     service_list="audioqna audioqna-ui whisper speecht5"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     docker images && sleep 1s
 }
 

@@ -42,7 +42,7 @@ services:
     environment:
       TTS_ENDPOINT: ${TTS_ENDPOINT}
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     container_name: tgi-service
     ports:
       - "${TGI_SERVICE_PORT:-3006}:80"

@@ -19,7 +19,7 @@ docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build
 
 ### 3. Build LLM Image
 
-Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu (https://github.com/huggingface/text-generation-inference)
+Intel Xeon optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu (https://github.com/huggingface/text-generation-inference)
 
 ### 4. Build TTS Image
 

@@ -26,7 +26,7 @@ services:
       https_proxy: ${https_proxy}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-service
     ports:
       - "3006:80"

@@ -19,7 +19,7 @@ docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy -
 
 ### 3. Build LLM Image
 
-Intel Gaudi optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/tgi-gaudi:2.0.6 (https://github.com/huggingface/tgi-gaudi)
+Intel Gaudi optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/tgi-gaudi:2.3.1 (https://github.com/huggingface/tgi-gaudi)
 
 ### 4. Build TTS Image
 

@@ -38,7 +38,7 @@ services:
       - SYS_NICE
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
+    image: ghcr.io/huggingface/tgi-gaudi:2.3.1
     container_name: tgi-gaudi-server
     ports:
       - "3006:80"

@@ -42,7 +42,7 @@ function build_docker_images() {
     service_list="avatarchatbot whisper-gaudi speecht5-gaudi wav2lip-gaudi animation"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1
 
     docker images && sleep 1s
 }

@@ -30,7 +30,7 @@ function build_docker_images() {
     service_list="avatarchatbot whisper asr llm-textgen speecht5 tts wav2lip animation"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
 
     docker images && sleep 3s
 }

@@ -42,7 +42,7 @@ function build_docker_images() {
     service_list="avatarchatbot whisper speecht5 wav2lip animation"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
 
     docker images && sleep 1s
 }

@@ -90,7 +90,7 @@
 - #### Optional. Pull TGI Docker Image (Do this if you want to use TGI)
 
   ```bash
-  docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
   ```
 
 - #### Build Docker Images
@@ -136,7 +136,7 @@
   - opea/dataprep:latest
   - ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
   - opea/retriever:latest
-  - ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  - ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
   - opea/chatqna:latest
   - opea/chatqna-ui:latest
   - opea/nginx:latest
@@ -147,7 +147,7 @@
   - opea/dataprep:latest
   - ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
   - opea/retriever:latest
-  - ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  - ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
   - opea/llm-faqgen:latest
   - opea/chatqna:latest
   - opea/chatqna-ui:latest

@@ -85,7 +85,7 @@ services:
     command: --model-id ${CHATQNA_RERANK_MODEL_ID} --auto-truncate
 
   chatqna-tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     container_name: chatqna-tgi-service
     ports:
       - "${CHATQNA_TGI_SERVICE_PORT}:80"

@@ -85,7 +85,7 @@ services:
     command: --model-id ${CHATQNA_RERANK_MODEL_ID} --auto-truncate
 
   chatqna-tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     container_name: chatqna-tgi-service
     ports:
       - "${CHATQNA_TGI_SERVICE_PORT}:80"

@@ -81,7 +81,7 @@ services:
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-server
     ports:
       - ${LLM_ENDPOINT_PORT:-9009}:80

@@ -81,7 +81,7 @@ services:
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-service
     ports:
       - "9009:80"

@@ -18,7 +18,7 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
 - tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
 - retriever: opea/retriever:latest
 - tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
-- tgi-service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+- tgi-service: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
 - chaqna-xeon-backend-server: opea/chatqna:latest
 
 Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.

@@ -69,7 +69,7 @@ function build_docker_images() {
     service_list="chatqna chatqna-ui dataprep retriever llm-faqgen nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > "${LOG_PATH}"/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
 
     docker images && sleep 1s

@@ -28,7 +28,7 @@ function build_docker_images() {
     service_list="chatqna chatqna-ui dataprep retriever llm-faqgen nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
     docker pull ghcr.io/huggingface/tei-gaudi:1.5.0
     docker images && sleep 1s

@@ -32,7 +32,7 @@ function build_docker_images() {
     service_list="chatqna chatqna-ui dataprep retriever llm-faqgen nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
     docker images && sleep 1s
 }

@@ -67,7 +67,7 @@ function build_docker_images() {
     service_list="chatqna chatqna-ui dataprep retriever nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > "${LOG_PATH}"/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
 
     docker images && sleep 1s

@@ -27,7 +27,7 @@ function build_docker_images() {
     service_list="chatqna chatqna-ui dataprep retriever nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.6
 
     docker images && sleep 1s

@@ -71,7 +71,7 @@
 - #### Optional. Pull TGI Docker Image (Do this if you want to use TGI)
 
   ```bash
-  docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
   ```
 
 - #### Build Docker Images
@@ -97,7 +97,7 @@
 
   ##### TGI-based application:
 
-  - ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  - ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
   - opea/llm-textgen:latest
   - opea/codegen:latest
   - opea/codegen-ui:latest

@@ -127,7 +127,7 @@ docker compose --profile codegen-xeon-tgi up -d
 Then run the command `docker images`, you will have the following Docker images:
 
 - `ghcr.io/huggingface/text-embeddings-inference:cpu-1.5`
-- `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`
+- `ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu`
 - `opea/codegen-gradio-ui`
 - `opea/codegen`
 - `opea/dataprep`
@@ -145,7 +145,7 @@ docker compose --profile codegen-xeon-vllm up -d
 Then run the command `docker images`, you will have the following Docker images:
 
 - `ghcr.io/huggingface/text-embeddings-inference:cpu-1.5`
-- `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`
+- `ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu`
 - `opea/codegen-gradio-ui`
 - `opea/codegen`
 - `opea/dataprep`

@@ -4,7 +4,7 @@
 services:
 
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: tgi-server
     profiles:
       - codegen-xeon-tgi

@@ -120,7 +120,7 @@ docker compose --profile codegen-gaudi-tgi up -d
 Then run the command `docker images`, you will have the following Docker images:
 
 - `ghcr.io/huggingface/text-embeddings-inference:cpu-1.5`
-- `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`
+- `ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu`
 - `opea/codegen-gradio-ui`
 - `opea/codegen`
 - `opea/dataprep`
@@ -138,7 +138,7 @@ docker compose --profile codegen-gaudi-vllm up -d
 Then run the command `docker images`, you will have the following Docker images:
 
 - `ghcr.io/huggingface/text-embeddings-inference:cpu-1.5`
-- `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`
+- `ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu`
 - `opea/codegen-gradio-ui`
 - `opea/codegen`
 - `opea/dataprep`

@@ -35,7 +35,7 @@ function build_docker_images() {
     service_list="codegen codegen-ui llm-textgen"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
     docker images && sleep 1s
 }
 

@@ -52,7 +52,7 @@ function build_docker_images() {
 
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     docker images && sleep 1s
 }
 

@@ -71,7 +71,7 @@
 - #### Optional. Pull TGI Docker Image (Do this if you want to use TGI)
 
   ```bash
-  docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
   ```
 
 - #### Build Docker Images
@@ -98,7 +98,7 @@
 
   ##### TGI-based application:
 
-  - ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
+  - ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
   - opea/llm-textgen:latest
   - opea/codetrans:latest
   - opea/codetrans-ui:latest

@@ -76,7 +76,7 @@ For users in China who are unable to download models directly from Huggingface,
    # Start vLLM LLM Service
    docker run -p 8008:80 -v ./data:/root/.cache/huggingface/hub --name vllm-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 128g opea/vllm:latest --model $model_name --host 0.0.0.0 --port 80
    # Start TGI LLM Service
-   docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model_name
+   docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu --model-id $model_name
    ```
 
 2. Offline
@@ -93,7 +93,7 @@ For users in China who are unable to download models directly from Huggingface,
      # Start vLLM LLM Service
      docker run -p 8008:80 -v $model_path:/root/.cache/huggingface/hub --name vllm-service --shm-size 128g opea/vllm:latest --model /root/.cache/huggingface/hub --host 0.0.0.0 --port 80
      # Start TGI LLM Service
-     docker run -p 8008:80 -v $model_path:/data --name tgi-service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id /data
+     docker run -p 8008:80 -v $model_path:/data --name tgi-service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu --model-id /data
      ```
 
 ### Setup Environment Variables

@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu
     container_name: codetrans-xeon-tgi-service
     ports:
       - "8008:80"