diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
index a0ef81d172..ea3c45b919 100644
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -41,7 +41,7 @@ services:
     environment:
       TTS_ENDPOINT: ${TTS_ENDPOINT}
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "3006:80"
diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
index d8ca1d7f8c..3e20dbc4af 100644
--- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
+++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
@@ -26,7 +26,7 @@ services:
       https_proxy: ${https_proxy}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "3006:80"
diff --git a/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml b/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml
index bd76774835..6856d2b878 100644
--- a/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml
+++ b/AudioQnA/kubernetes/intel/cpu/xeon/manifest/audioqna.yaml
@@ -247,7 +247,7 @@ spec:
       - envFrom:
         - configMapRef:
             name: audio-qna-config
-        image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+        image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
         name: llm-dependency-deploy-demo
         securityContext:
           capabilities:
diff --git a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
index aa6f49bf87..2496b11e87 100644
--- a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
@@ -42,7 +42,7 @@ services:
     environment:
       TTS_ENDPOINT: ${TTS_ENDPOINT}
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "3006:80"
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
index 4598c07ec0..3f2766ec57 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
@@ -195,7 +195,7 @@ For users in China who are unable to download models directly from Huggingface,
    export HF_TOKEN=${your_hf_token}
    export HF_ENDPOINT="https://hf-mirror.com"
    model_name="Intel/neural-chat-7b-v3-3"
-   docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id $model_name
+   docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model_name
    ```
 
 2. Offline
@@ -209,7 +209,7 @@ For users in China who are unable to download models directly from Huggingface,
      ```bash
      export HF_TOKEN=${your_hf_token}
      export model_path="/path/to/model"
-     docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id /data
+     docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id /data
      ```
 
 ### Setup Environment Variables
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
index 14794e8d4f..0c290b8683 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -73,7 +73,7 @@ services:
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "9009:80"
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
index 8d37bb83af..ad7df8fa79 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
@@ -72,7 +72,7 @@ services:
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "6042:80"
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
index e497985f8a..938a6690d3 100644
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
@@ -57,7 +57,7 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "9009:80"
diff --git a/ChatQnA/kubernetes/intel/README_gmc.md b/ChatQnA/kubernetes/intel/README_gmc.md
index dab86381fe..860bae7205 100644
--- a/ChatQnA/kubernetes/intel/README_gmc.md
+++ b/ChatQnA/kubernetes/intel/README_gmc.md
@@ -18,7 +18,7 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
 - tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
 - retriever: opea/retriever-redis:latest
 - tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+- tgi-service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
 - chaqna-xeon-backend-server: opea/chatqna:latest
 
 Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml
index 3d5f367d0f..70aa65bd63 100644
--- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml
+++ b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml
@@ -1100,7 +1100,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
@@ -1180,7 +1180,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
index 69e33b873d..744f095915 100644
--- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
+++ b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
@@ -922,7 +922,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml
index 90db7043c7..b182851179 100644
--- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml
+++ b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml
@@ -925,7 +925,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/ChatQnA/tests/test_compose_on_xeon.sh b/ChatQnA/tests/test_compose_on_xeon.sh
index f906dfabbf..3535159b3f 100644
--- a/ChatQnA/tests/test_compose_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="chatqna chatqna-ui chatqna-conversation-ui dataprep-redis retriever-redis nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
 
     docker images && sleep 1s
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
index ab1e4150ce..64b74db71f 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "8028:80"
diff --git a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
index 4e6d8f91c8..d0070dc969 100644
--- a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
+++ b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
@@ -404,7 +404,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen_react_ui.yaml b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen_react_ui.yaml
index 5d77fb8cc8..a155af13a0 100644
--- a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen_react_ui.yaml
+++ b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen_react_ui.yaml
@@ -126,7 +126,7 @@ spec:
             - name: no_proxy
               value:
           securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh
index 0821cd3cb6..b184c00f31 100644
--- a/CodeGen/tests/test_compose_on_xeon.sh
+++ b/CodeGen/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="codegen codegen-ui llm-tgi"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml b/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml
index 122028b56e..16c05cf363 100644
--- a/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: codetrans-tgi-service
     ports:
       - "8008:80"
diff --git a/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml b/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml
index 4429083432..a778a8529e 100644
--- a/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml
+++ b/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml
@@ -404,7 +404,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/CodeTrans/tests/test_compose_on_xeon.sh b/CodeTrans/tests/test_compose_on_xeon.sh
index d1f55c9a3d..63fe74f058 100644
--- a/CodeTrans/tests/test_compose_on_xeon.sh
+++ b/CodeTrans/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="codetrans codetrans-ui llm-tgi nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
index 620ee36575..35e673563b 100644
--- a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "8008:80"
diff --git a/DocSum/kubernetes/intel/README_gmc.md b/DocSum/kubernetes/intel/README_gmc.md
index b332292110..6046ca4dcb 100644
--- a/DocSum/kubernetes/intel/README_gmc.md
+++ b/DocSum/kubernetes/intel/README_gmc.md
@@ -8,7 +8,7 @@ Install GMC in your Kubernetes cluster, if you have not already done so, by foll
 The DocSum application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not it starts them and then proceeds to connect them. When the DocSum RAG pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular embedding, retriever, rerank, and llm.
 
 The DocSum pipeline uses  prebuilt images. The Xeon version uses the prebuilt image `llm-docsum-tgi:latest` which internally leverages the
-the image `ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
+the image `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`. The service is called tgi-svc. Meanwhile, the Gaudi version launches the
 service tgi-gaudi-svc, which uses the image `ghcr.io/huggingface/tgi-gaudi:2.0.5`. Both TGI model services serve the model specified in the LLM_MODEL_ID variable that is exported by you. In the below example we use `Intel/neural-chat-7b-v3-3`.
 
 [NOTE]
diff --git a/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml b/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
index 1416bdbcbc..9199888a10 100644
--- a/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
+++ b/DocSum/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
@@ -404,7 +404,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/react-docsum.yaml b/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/react-docsum.yaml
index 61e8799b0e..560e34a215 100644
--- a/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/react-docsum.yaml
+++ b/DocSum/kubernetes/intel/cpu/xeon/manifest/ui/react-docsum.yaml
@@ -126,7 +126,7 @@ spec:
             - name: no_proxy
               value:
           securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml b/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml
index 8c5c894aed..59df3093e9 100644
--- a/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/FaqGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-xeon-server
     ports:
       - "8008:80"
diff --git a/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_react_ui.yaml b/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_react_ui.yaml
index 845ba50412..53b2d541f3 100644
--- a/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_react_ui.yaml
+++ b/FaqGen/kubernetes/intel/cpu/xeon/manifest/faqgen_react_ui.yaml
@@ -126,7 +126,7 @@ spec:
             - name: no_proxy
               value:
           securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
index 43de640ad3..c3a65e92b6 100644
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
@@ -993,7 +993,7 @@ spec:
                 name: chatqna-tgi-config
           securityContext:
             {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
index 6c52c5d921..5eb3cd6eb4 100644
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/codegen.yaml
@@ -229,7 +229,7 @@ spec:
                 name: codegen-tgi-config
           securityContext:
             {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
index 0fda41f5e1..44d16ee9a8 100644
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/docsum.yaml
@@ -229,7 +229,7 @@ spec:
                 name: docsum-tgi-config
           securityContext:
             {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml
index 749d984082..2c0b3bffc2 100644
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/faqgen.yaml
@@ -138,7 +138,7 @@ spec:
             - configMapRef:
                 name: faqgen-tgi-config
           securityContext: {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml b/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml
index 0b7995118d..53be5846e3 100644
--- a/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/SearchQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -73,7 +73,7 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "3006:80"
diff --git a/SearchQnA/tests/test_compose_on_xeon.sh b/SearchQnA/tests/test_compose_on_xeon.sh
index 5436cc1c50..6c73833acc 100644
--- a/SearchQnA/tests/test_compose_on_xeon.sh
+++ b/SearchQnA/tests/test_compose_on_xeon.sh
@@ -23,7 +23,7 @@ function build_docker_images() {
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/Translation/docker_compose/intel/cpu/xeon/compose.yaml b/Translation/docker_compose/intel/cpu/xeon/compose.yaml
index 108a5086d2..39ea18d460 100644
--- a/Translation/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/Translation/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "8008:80"
diff --git a/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml b/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml
index e30fee338e..9cc8c2798f 100644
--- a/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml
+++ b/Translation/kubernetes/intel/cpu/xeon/manifest/translation.yaml
@@ -361,7 +361,7 @@ spec:
             runAsUser: 1000
             seccompProfile:
               type: RuntimeDefault
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/Translation/tests/test_compose_on_xeon.sh b/Translation/tests/test_compose_on_xeon.sh
index 2d0c5306d5..b7fc6acb39 100644
--- a/Translation/tests/test_compose_on_xeon.sh
+++ b/Translation/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="translation translation-ui llm-tgi nginx"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/README.md b/VisualQnA/docker_compose/intel/cpu/xeon/README.md
index 8f0d5b6b34..eb1ef817b0 100644
--- a/VisualQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/VisualQnA/docker_compose/intel/cpu/xeon/README.md
@@ -67,12 +67,12 @@ docker build --no-cache -t opea/visualqna-ui:latest --build-arg https_proxy=$htt
 ### 4. Pull TGI Xeon Image
 
 ```bash
-docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
 ```
 
 Then run the command `docker images`, you will have the following 5 Docker Images:
 
-1. `ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu`
+1. `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu`
 2. `opea/lvm-tgi:latest`
 3. `opea/visualqna:latest`
 4. `opea/visualqna-ui:latest`
diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml b/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml
index 553b13908b..33b5e189b1 100644
--- a/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -3,7 +3,7 @@
 
 services:
   llava-tgi-service:
-    image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-llava-xeon-server
     ports:
       - "8399:80"
diff --git a/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml b/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml
index 4d3ee3bf24..1f1b2d316a 100644
--- a/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml
+++ b/VisualQnA/kubernetes/intel/cpu/xeon/manifest/visualqna.yaml
@@ -216,7 +216,7 @@ spec:
                 name: visualqna-tgi-config
           securityContext:
             {}
-          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
+          image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/VisualQnA/tests/test_compose_on_xeon.sh b/VisualQnA/tests/test_compose_on_xeon.sh
index 8829896387..4d9c194833 100644
--- a/VisualQnA/tests/test_compose_on_xeon.sh
+++ b/VisualQnA/tests/test_compose_on_xeon.sh
@@ -21,7 +21,7 @@ function build_docker_images() {
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
     docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker images && sleep 1s
 }
 
diff --git a/VisualQnA/ui/svelte/package.json b/VisualQnA/ui/svelte/package.json
index 6444d18c14..e2a39a2c4d 100644
--- a/VisualQnA/ui/svelte/package.json
+++ b/VisualQnA/ui/svelte/package.json
@@ -15,8 +15,7 @@
     "@fortawesome/free-solid-svg-icons": "6.2.0",
     "@playwright/test": "^1.33.0",
     "@sveltejs/adapter-auto": "1.0.0-next.75",
-    "@sveltejs/adapter-static": "^3.0.0",
-    "@sveltejs/kit": "^2.0.0",
+    "@sveltejs/kit": "^1.30.4",
     "@tailwindcss/typography": "0.5.7",
     "@types/debug": "4.1.7",
     "@types/node": "^20.12.13",
@@ -29,20 +28,21 @@
     "eslint": "^8.16.0",
     "eslint-config-prettier": "^8.3.0",
     "eslint-plugin-neverthrow": "1.1.4",
+    "eslint-plugin-svelte3": "^4.0.0",
     "postcss": "^8.4.31",
     "postcss-load-config": "^4.0.1",
     "postcss-preset-env": "^8.3.2",
     "prettier": "^2.8.8",
     "prettier-plugin-svelte": "^2.7.0",
     "prettier-plugin-tailwindcss": "^0.3.0",
-    "svelte": "^4.0.0",
-    "svelte-check": "^3.0.0",
+    "svelte": "^3.59.1",
+    "svelte-check": "^2.7.1",
     "svelte-fa": "3.0.3",
-    "svelte-preprocess": "^6.0.2",
+    "svelte-preprocess": "^4.10.7",
     "tailwindcss": "^3.1.5",
     "tslib": "^2.3.1",
-    "typescript": "^5.0.0",
-    "vite": "^5.0.0"
+    "typescript": "^4.7.4",
+    "vite": "^4.5.2"
   },
   "type": "module",
   "dependencies": {