HUGGINGFACEHUB_API_TOKEN environment is change to HF_TOKEN

Fix opea-project#1467, opea-project#1485, opea-project#1486 For OPEA setting, please use HF_TOKEN in set_env.sh and document. HUGGINGFACEHUB_API_TOKEN is used in huggingface_hub library HUGGINGFACEHUB_API_TOKEN environment is change to HF_TOKEN in between 0.23.5 to 0.26 https://huggingface.co/docs/huggingface_hub/en/package_reference/environment_variables#hftoken https://huggingface.co/docs/huggingface_hub/package_reference/environment_variables#deprecated-environment-variables the huggingface_hub version in TGI CPU image and TGI Gauid image are different. Currently HUGGINGFACEHUB_API_TOKEN is used by Xone 2.4.0-intel-cpu image (huggingface_hub 0.23.5), while HF_TOKE is used by ghcr.io/huggingface/tgi-gaudi:2.3.1 (huggingface_hub 0.26). Besides, there are other version in TEI embedding image. During this stage, set both environment variables HUGGINGFACEHUB_API_TOKEN and HF_TOKEN in docker comose.yaml for huggingface_hub library. Fix opea-project#1485 align docker image depends on gauid 1.19 driver Signed-off-by: Wang, Xigui <[email protected]>
xiguiw · Feb 8, 2025 · 4f71873 · 4f71873
1 parent 1b3291a
commit 4f71873
Show file tree

Hide file tree

Showing 69 changed files with 263 additions and 113 deletions.
diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/README.md b/AudioQnA/docker_compose/intel/cpu/xeon/README.md
@@ -49,7 +49,7 @@ Before starting the services with `docker compose`, you have to recheck the foll
 
 ```bash
 export host_ip=<your External Public IP>    # export host_ip=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=<your HF token>
+export HF_TOKEN=<your HF token>
 
 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
 

diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh b/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -5,7 +5,11 @@
 
 # export host_ip=<your External Public IP>
 export host_ip=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
 # <token>
 
 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3

diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/README.md b/AudioQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -49,7 +49,7 @@ Before starting the services with `docker compose`, you have to recheck the foll
 
 ```bash
 export host_ip=<your External Public IP>    # export host_ip=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=<your HF token>
+export HF_TOKEN=<your HF token>
 
 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
 

diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -45,7 +45,8 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
       HABANA_VISIBLE_DEVICES: all

diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -5,7 +5,13 @@
 
 # export host_ip=<your External Public IP>
 export host_ip=$(hostname -I | awk '{print $1}')
-export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
+
+export HF_TOKEN=${HF_TOKEN}
 # <token>
 
 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3

diff --git a/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md b/AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
@@ -58,7 +58,7 @@ Then run the command `docker images`, you will have following images ready:
 Before starting the services with `docker compose`, you have to recheck the following environment variables.
 
 ```bash
-export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
+export HF_TOKEN=<your_hf_token>
 export host_ip=$(hostname -I | awk '{print $1}')
 
 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
@@ -173,7 +173,7 @@ In the current version v1.0, you need to set the avatar figure image/video and t
 cd GenAIExamples/AvatarChatbot/tests
 export IMAGE_REPO="opea"
 export IMAGE_TAG="latest"
-export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
+export HF_TOKEN=<your_hf_token>
 
 test_avatarchatbot_on_xeon.sh
 ```
diff --git a/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml b/AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
@@ -37,7 +37,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
     healthcheck:
       test: ["CMD-SHELL", "curl -f http://${host_ip}:3006/health || exit 1"]
       interval: 10s

diff --git a/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md b/AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
@@ -58,7 +58,7 @@ Then run the command `docker images`, you will have following images ready:
 Before starting the services with `docker compose`, you have to recheck the following environment variables.
 
 ```bash
-export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
+export HF_TOKEN=<your_hf_token>
 export host_ip=$(hostname -I | awk '{print $1}')
 
 export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
@@ -183,7 +183,7 @@ In the current version v1.0, you need to set the avatar figure image/video and t
 cd GenAIExamples/AvatarChatbot/tests
 export IMAGE_REPO="opea"
 export IMAGE_TAG="latest"
-export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
+export HF_TOKEN=<your_hf_token>
 
 test_avatarchatbot_on_gaudi.sh
 ```
diff --git a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -38,7 +38,7 @@ services:
       - SYS_NICE
     restart: unless-stopped
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.6
+    image: ghcr.io/huggingface/tgi-gaudi:2.3.1
     container_name: tgi-gaudi-server
     ports:
       - "3006:80"
@@ -48,7 +48,8 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
       HABANA_VISIBLE_DEVICES: all

diff --git a/ChatQnA/docker_compose/intel/cpu/aipc/README.md b/ChatQnA/docker_compose/intel/cpu/aipc/README.md
@@ -105,7 +105,7 @@ export https_proxy=${your_http_proxy}
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export INDEX_NAME="rag-redis"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export OLLAMA_HOST=${host_ip}
 export OLLAMA_MODEL="llama3.2"
 ```
@@ -116,7 +116,7 @@ export OLLAMA_MODEL="llama3.2"
 set EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
 set RERANK_MODEL_ID=BAAI/bge-reranker-base
 set INDEX_NAME=rag-redis
-set HUGGINGFACEHUB_API_TOKEN=%your_hf_api_token%
+set HF_TOKEN=%your_hf_api_token%
 set OLLAMA_HOST=host.docker.internal
 set OLLAMA_MODEL="llama3.2"
 ```

diff --git a/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml b/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
@@ -24,7 +24,8 @@ services:
       REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
       TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
   tei-embedding-service:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-server
@@ -54,7 +55,8 @@ services:
       REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
       TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LOGFLAG: ${LOGFLAG}
       RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
     restart: unless-stopped
@@ -70,7 +72,8 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate

diff --git a/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh b/ChatQnA/docker_compose/intel/cpu/aipc/set_env.sh
@@ -7,15 +7,11 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
-if [ -z "${your_hf_api_token}" ]; then
-    echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set your_hf_api_token."
+if [ -z "${HF_TOKEN}" ]; then
+    echo "Error: HF_TOKEN is not set. Please set HF_TOKEN."
 fi
 
-if [ -z "${host_ip}" ]; then
-    echo "Error: host_ip is not set. Please set host_ip first."
-fi
-
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export host_ip=$(hostname -I | awk '{print $1}')
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export INDEX_NAME="rag-redis"

diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
@@ -21,7 +21,7 @@ To set up environment variables for deploying ChatQnA services, follow these ste
    ```bash
    # Example: host_ip="192.168.1.1"
    export host_ip="External_Public_IP"
-   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+   export HF_TOKEN="Your_Huggingface_API_Token"
    ```
 
 2. If you are in a proxy environment, also set the proxy-related environment variables:
@@ -228,7 +228,7 @@ For users in China who are unable to download models directly from Huggingface,
    ```bash
    # Example: host_ip="192.168.1.1"
    export host_ip="External_Public_IP"
-   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+   export HF_TOKEN="Your_Huggingface_API_Token"
    # Example: NGINX_PORT=80
    export NGINX_PORT=${your_nginx_port}
    ```

diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -24,7 +24,8 @@ services:
       REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
       TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
   tei-embedding-service:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-server
@@ -54,7 +55,8 @@ services:
       REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
       TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       LOGFLAG: ${LOGFLAG}
       RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
     restart: unless-stopped
@@ -70,7 +72,8 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate

diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -7,6 +7,11 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
+
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"
 export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"

diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -21,7 +21,7 @@ To set up environment variables for deploying ChatQnA services, follow these ste
    ```bash
    # Example: host_ip="192.168.1.1"
    export host_ip="External_Public_IP"
-   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+   export HF_TOKEN="Your_Huggingface_API_Token"
    ```
 
 2. If you are in a proxy environment, also set the proxy-related environment variables:
@@ -197,9 +197,9 @@ For users in China who are unable to download models directly from Huggingface,
    export HF_ENDPOINT="https://hf-mirror.com"
    model_name="meta-llama/Meta-Llama-3-8B-Instruct"
    # Start vLLM LLM Service
-   docker run -p 8007:80 -v ./data:/data --name vllm-gaudi-server -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e VLLM_TORCH_PROFILER_DIR="/mnt" --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model $model_name --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
+   docker run -p 8007:80 -v ./data:/data --name vllm-gaudi-server -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_TOKEN=$HF_TOKEN -e VLLM_TORCH_PROFILER_DIR="/mnt" --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model $model_name --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
    # Start TGI LLM Service
-   docker run -p 8005:80 -v ./data:/data --name tgi-gaudi-server -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id $model_name --max-input-tokens 1024 --max-total-tokens 2048
+   docker run -p 8005:80 -v ./data:/data --name tgi-gaudi-server -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id $model_name --max-input-tokens 1024 --max-total-tokens 2048
    ```
 
 2. Offline
@@ -214,9 +214,9 @@ For users in China who are unable to download models directly from Huggingface,
      export HF_TOKEN=${your_hf_token}
      export model_path="/path/to/model"
      # Start vLLM LLM Service
-     docker run -p 8007:80 -v $model_path:/data --name vllm-gaudi-server --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e VLLM_TORCH_PROFILER_DIR="/mnt" --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model /data --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
+     docker run -p 8007:80 -v $model_path:/data --name vllm-gaudi-server --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_TOKEN=$HF_TOKEN -e VLLM_TORCH_PROFILER_DIR="/mnt" --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model /data --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
      # Start TGI LLM Service
-     docker run -p 8005:80 -v $model_path:/data --name tgi-gaudi-server --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id /data --max-input-tokens 1024 --max-total-tokens 2048
+     docker run -p 8005:80 -v $model_path:/data --name tgi-gaudi-server --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e HUGGING_FACE_HUB_TOKEN=$HF_TOKEN -e HF_TOKEN=$HF_TOKEN -e ENABLE_HPU_GRAPH=true -e LIMIT_HPU_GRAPH=true -e USE_FLASH_ATTENTION=true -e FLASH_ATTENTION_RECOMPUTE=true --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:2.0.6 --model-id /data --max-input-tokens 1024 --max-total-tokens 2048
      ```
 
 ### Setup Environment Variables
@@ -226,7 +226,7 @@ For users in China who are unable to download models directly from Huggingface,
    ```bash
    # Example: host_ip="192.168.1.1"
    export host_ip="External_Public_IP"
-   export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+   export HF_TOKEN="Your_Huggingface_API_Token"
    # Example: NGINX_PORT=80
    export NGINX_PORT=${your_nginx_port}
    ```

diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -24,7 +24,8 @@ services:
       REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
       TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
   tei-embedding-service:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-gaudi-server
@@ -54,7 +55,8 @@ services:
       REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
       TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
     restart: unless-stopped
   tei-reranking-service:
     image: ghcr.io/huggingface/tei-gaudi:1.5.0
@@ -88,7 +90,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
       LLM_MODEL_ID: ${LLM_MODEL_ID}

diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/ChatQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -6,6 +6,10 @@ pushd "../../../../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
 
 export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
 export RERANK_MODEL_ID="BAAI/bge-reranker-base"

diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md
@@ -101,7 +101,7 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen"

diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -14,7 +14,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       host_ip: ${host_ip}
     healthcheck:
       test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"]
@@ -37,7 +37,8 @@ services:
       https_proxy: ${https_proxy}
       LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
     restart: unless-stopped
   codegen-xeon-backend-server:
     image: ${REGISTRY:-opea}/codegen:${TAG:-latest}

diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/README.md b/CodeGen/docker_compose/intel/hpu/gaudi/README.md
@@ -87,7 +87,7 @@ export http_proxy=${your_http_proxy}
 export https_proxy=${your_http_proxy}
 export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
-export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export HF_TOKEN=${your_hf_api_token}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen"

diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -15,7 +15,8 @@ services:
       https_proxy: ${https_proxy}
       HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
       ENABLE_HPU_GRAPH: true
       LIMIT_HPU_GRAPH: true
       USE_FLASH_ATTENTION: true
@@ -45,7 +46,8 @@ services:
       https_proxy: ${https_proxy}
       LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN}
     restart: unless-stopped
   codegen-gaudi-backend-server:
     image: ${REGISTRY:-opea}/codegen:${TAG:-latest}

diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh
@@ -6,7 +6,12 @@ pushd "../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
+if [ -z "$HF_TOKEN" ]; then
+    echo "Error: The HF_TOKEN environment variable is **NOT** set. Please set it"
+    return -1
+fi
 
+export host_ip=$(hostname -I | awk '{print $1}')
 export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
 export MEGA_SERVICE_HOST_IP=${host_ip}