diff --git a/test/e2e/nemo-dependencies/install.yaml b/test/e2e/nemo-dependencies/install.yaml index 7d7d6fb7f..b10b2de54 100644 --- a/test/e2e/nemo-dependencies/install.yaml +++ b/test/e2e/nemo-dependencies/install.yaml @@ -92,3 +92,19 @@ Then open http://localhost:8888/ in your browser. If prompted for a token, use: {{ jupyter_token }} when: install.jupyter == true + +- hosts: localhost + vars_files: + - values.yaml + roles: + - role: rag + vars: + namespace: "{{ installation_namespace }}" + when: install.rag == true + tasks: + - name: RAG Details + debug: + msg: + - "RAG minio Password: {{ minio.password }}" + - "RAG minio Username: {{ minio.username }}" + when: install.rag == true \ No newline at end of file diff --git a/test/e2e/nemo-dependencies/rag/defaults/main.yml b/test/e2e/nemo-dependencies/rag/defaults/main.yml new file mode 100644 index 000000000..4d801a474 --- /dev/null +++ b/test/e2e/nemo-dependencies/rag/defaults/main.yml @@ -0,0 +1,20 @@ +--- +# defaults file for rag +namespace: "rag" + +# Milvus Helm chart details +milvus: + enabled: true + helm_repo_name: "rag-milvus" + helm_repo_url: "https://zilliztech.github.io/milvus-helm/" + chart_name: "milvus/milvus" + chart_version: "4.1.11" + +# Minio Helm chart details +minio: + enabled: true + helm_release_name: rag-minio + helm_oci_registry: oci://registry-1.docker.io/bitnamicharts/minio + username: minioadmin + password: minioadmin + diff --git a/test/e2e/nemo-dependencies/rag/handlers/main.yml b/test/e2e/nemo-dependencies/rag/handlers/main.yml new file mode 100644 index 000000000..194ac6fb9 --- /dev/null +++ b/test/e2e/nemo-dependencies/rag/handlers/main.yml @@ -0,0 +1,2 @@ +--- +# handlers file for rag blueprint diff --git a/test/e2e/nemo-dependencies/rag/meta/main.yml b/test/e2e/nemo-dependencies/rag/meta/main.yml new file mode 100644 index 000000000..c572acc9f --- /dev/null +++ b/test/e2e/nemo-dependencies/rag/meta/main.yml @@ -0,0 +1,52 @@ +galaxy_info: + author: your name + description: your role description + company: your company (optional) + + # If the issue tracker for your role is not on github, uncomment the + # next line and provide a value + # issue_tracker_url: http://example.com/issue/tracker + + # Choose a valid license ID from https://spdx.org - some suggested licenses: + # - BSD-3-Clause (default) + # - MIT + # - GPL-2.0-or-later + # - GPL-3.0-only + # - Apache-2.0 + # - CC-BY-4.0 + license: license (GPL-2.0-or-later, MIT, etc) + + min_ansible_version: 2.1 + + # If this a Container Enabled role, provide the minimum Ansible Container version. + # min_ansible_container_version: + + # + # Provide a list of supported platforms, and for each platform a list of versions. + # If you don't wish to enumerate all versions for a particular platform, use 'all'. + # To view available platforms and versions (or releases), visit: + # https://galaxy.ansible.com/api/v1/platforms/ + # + # platforms: + # - name: Fedora + # versions: + # - all + # - 25 + # - name: SomePlatform + # versions: + # - all + # - 1.0 + # - 7 + # - 99.99 + + galaxy_tags: [] + # List tags for your role here, one per line. A tag is a keyword that describes + # and categorizes the role. Users find roles by searching for tags. Be sure to + # remove the '[]' above, if you add tags to this list. + # + # NOTE: A tag is limited to a single word comprised of alphanumeric characters. + # Maximum 20 tags per role. + +dependencies: [] + # List your role dependencies here, one per line. Be sure to remove the '[]' above, + # if you add dependencies to this list. diff --git a/test/e2e/nemo-dependencies/rag/tasks/local-path-provisioner.yaml b/test/e2e/nemo-dependencies/rag/tasks/local-path-provisioner.yaml new file mode 100644 index 000000000..b770a6df2 --- /dev/null +++ b/test/e2e/nemo-dependencies/rag/tasks/local-path-provisioner.yaml @@ -0,0 +1,26 @@ +- name: Check if 'local-path-storage' namespace exists + shell: kubectl get namespace local-path-storage --no-headers + register: ns_check + ignore_errors: true + + +- name: Deploy local-path-storage if not already deployed + shell: > + kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/{{ localPathProvisioner.version }}/deploy/local-path-storage.yaml + when: ns_check.rc != 0 + +- name: Wait for local-path-provisioner deployment to be available + command: kubectl rollout status deployment/local-path-provisioner -n local-path-storage --timeout=120s + register: rollout_status + retries: 5 + delay: 10 + until: rollout_status.rc == 0 + when: ns_check.rc != 0 + +- name: Set 'local-path' as the default StorageClass + shell: | + kubectl patch storageclass local-path -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}' + register: sc_patch_result + changed_when: "'patched' in sc_patch_result.stdout" + failed_when: sc_patch_result.rc != 0 + when: localPathProvisioner.default | bool \ No newline at end of file diff --git a/test/e2e/nemo-dependencies/rag/tasks/main.yml b/test/e2e/nemo-dependencies/rag/tasks/main.yml new file mode 100644 index 000000000..a18a8eef8 --- /dev/null +++ b/test/e2e/nemo-dependencies/rag/tasks/main.yml @@ -0,0 +1,9 @@ +--- +# tasks file for evaluator +- include_tasks: local-path-provisioner.yaml + when: localPathProvisioner.enabled +- include_tasks: namespace.yaml +- include_tasks: milvus.yaml + when: milvus.enabled +- include_tasks: minio.yaml + when: minio.enabled diff --git a/test/e2e/nemo-dependencies/rag/tasks/milvus.yaml b/test/e2e/nemo-dependencies/rag/tasks/milvus.yaml new file mode 100644 index 000000000..831efc895 --- /dev/null +++ b/test/e2e/nemo-dependencies/rag/tasks/milvus.yaml @@ -0,0 +1,95 @@ +--- +- name: Get Kube API resources + command: kubectl api-resources --verbs=list --namespaced -o name + register: api_resources + +- name: Check if the current cluster is OpenShift + set_fact: + is_openshift: "{{ 'routes.route.openshift.io' in api_resources.stdout_lines }}" + + +- name: OpenShift - Prepare RBAC to use anyuid SCC + ansible.builtin.template: + src: milvus-oc-rbac.yaml.j2 + dest: milvus-oc-rbac.yaml + when: is_openshift + +- name: OpenShift - apply RBAC to use anyuid SCC + command: kubectl apply -f milvus-oc-rbac.yaml + when: is_openshift + +- name: Add Helm repository for Milvus + command: helm repo add {{ milvus.helm_repo_name }} {{ milvus.helm_repo_url }} + +- name: Update Helm repositories + command: helm repo update + +- name: Template values file + ansible.builtin.template: + src: milvus-values.yaml.j2 + dest: milvus-values.yaml + +- name: OpenShift - configure Milvus to use its dedicated service account + blockinfile: + path: milvus-values.yaml + marker: "# {mark} ANSIBLE MANAGED BLOCK" + insertafter: "^(.*)$" + block: | + serviceAccount: + create: false + name: milvus + when: is_openshift + +- name: Install Milvus Helm chart + shell: > + helm upgrade --install {{ milvus.helm_repo_name }} + {{ milvus.chart_name }} + --namespace {{ namespace }} + --version {{ milvus.chart_version }} + --values milvus-values.yaml + register: helm_install_result + changed_when: "'STATUS: deployed' in helm_install_result.stdout" + +- name: Verify Milvus installation + command: kubectl get pods -n {{ namespace }} + register: pods + +- name: Wait for Milvus pod to be ready + command: kubectl wait --for=condition=Ready pod -n {{ namespace }} -l app.kubernetes.io/instance={{ milvus.helm_repo_name }} --timeout=300s + +- name: Get Milvus pod details + shell: | + kubectl get pods -n {{ namespace }} -l app.kubernetes.io/instance={{ milvus.helm_repo_name }} -o json + register: milvus_pods + changed_when: false + +- name: Get Milvus pod name + shell: | + kubectl get pods -n {{ namespace }} -l app.kubernetes.io/instance={{ milvus.helm_repo_name }} -o jsonpath='{.items[0].metadata.name}' + register: milvus_pod_name + failed_when: milvus_pod_name.stdout == "" + changed_when: false + +- name: Debug Milvus pod name + debug: + msg: "Milvus pod name is {{ milvus_pod_name.stdout }}" + +- name: Get the IP of the running Milvus pod + shell: | + kubectl get pod -n {{ namespace }} {{ milvus_pod_name.stdout }} -o jsonpath='{.status.podIP}' + register: milvus_pod_ip + failed_when: milvus_pod_ip.stdout == "" + changed_when: false + +- name: Debug Milvus Pod IP + debug: + msg: "Milvus Pod IP is {{ milvus_pod_ip.stdout }}" + when: milvus_pod_ip | length > 0 + +- name: Check Milvus service is responding + command: kubectl run milvus-check --image=busybox --restart=Never --attach --rm=true -- nc -w 10 -zv {{ milvus_pod_ip.stdout }} 19530 + register: milvus_status + +- name: Display Milvus connectivity status + debug: + msg: "{{ '✅ Milvus is accessible!' if milvus_status.rc == 0 else '❌ Milvus is not reachable!' }}" diff --git a/test/e2e/nemo-dependencies/rag/tasks/minio.yaml b/test/e2e/nemo-dependencies/rag/tasks/minio.yaml new file mode 100644 index 000000000..f9b85dabd --- /dev/null +++ b/test/e2e/nemo-dependencies/rag/tasks/minio.yaml @@ -0,0 +1,76 @@ +--- +- name: Template values file + ansible.builtin.template: + src: minio-values.yaml.j2 + dest: minio-values.yaml + +- name: Install minio Helm chart + shell: > + helm upgrade --install {{ minio.helm_release_name }} + {{ minio.helm_oci_registry }} + --namespace {{ namespace }} + --values minio-values.yaml + register: helm_install_result + changed_when: "'STATUS: deployed' in helm_install_result.stdout" + +- name: Verify MinIO installation + command: kubectl get pods -n {{ namespace }} + register: pods + +- name: Check if mc binary exists in workspace + stat: + path: "./mc" + register: mc_binary + +- name: Download mc for Linux x86_64 + shell: | + curl -O https://dl.min.io/client/mc/release/linux-amd64/mc && + chmod +x mc + when: not mc_binary.stat.exists and ansible_system == 'Linux' and ansible_architecture == 'x86_64' + +- name: Download mc for Linux aarch64 + shell: | + curl -O https://dl.min.io/client/mc/release/linux-arm64/mc && + chmod +x mc + when: not mc_binary.stat.exists and ansible_system == 'Linux' and ansible_architecture == 'aarch64' + +- name: Download mc for macOS + shell: | + curl -O https://dl.min.io/client/mc/release/darwin-amd64/mc && + chmod +x mc + when: not mc_binary.stat.exists and ansible_system == 'Darwin' + +- name: Wait for MinIO pods to be ready + shell: | + kubectl get pods -n {{ namespace }} \ + | grep {{ minio.helm_release_name }} \ + | grep -v console \ + | awk '{print $1}' \ + | xargs kubectl get pod -n {{ namespace }} -o json + register: minio_pods + retries: 30 + delay: 10 + until: minio_pods.stdout | from_json | json_query("items[*].status.phase") | unique == ['Running'] + failed_when: minio_pods.rc != 0 + +- name: Get the IP of the running MinIO pod + set_fact: + minio_pod_ip: "{{ (minio_pods.stdout | from_json).status.podIP }}" + +- name: Run validation to connect to MinIO + ignore_errors: true + shell: | + echo "Running MinIO validation script" + ./mc alias set myminio http://{{ minio_pod_ip }}:9000 {{ minio.username }} {{ minio.password }} --insecure + ./mc mb myminio/testbucket --insecure + register: minio_validation_output + +- name: MinIO Connection status (Success) + debug: + msg: "Successfully connected and created test bucket on MinIO." + when: minio_validation_output.rc == 0 + +- name: MinIO Connection status (Failed) + debug: + msg: "Failed to connect and create test bucket on MinIO." + when: minio_validation_output.rc != 0 diff --git a/test/e2e/nemo-dependencies/rag/tasks/namespace.yaml b/test/e2e/nemo-dependencies/rag/tasks/namespace.yaml new file mode 100644 index 000000000..b09cd2a47 --- /dev/null +++ b/test/e2e/nemo-dependencies/rag/tasks/namespace.yaml @@ -0,0 +1,10 @@ +- name: Check if the provided namespace exists + shell: kubectl get namespace {{ namespace }} --no-headers + register: ns_check + ignore_errors: true + + +- name: Create the provided namespace if not already exists + shell: > + kubectl create namespace {{ namespace }} + when: ns_check.rc != 0 diff --git a/test/e2e/nemo-dependencies/rag/tasks/uninstall.yaml b/test/e2e/nemo-dependencies/rag/tasks/uninstall.yaml new file mode 100644 index 000000000..d540c6da0 --- /dev/null +++ b/test/e2e/nemo-dependencies/rag/tasks/uninstall.yaml @@ -0,0 +1,24 @@ +- name: Check if RAG dependencies are installed + shell: helm list -n {{ namespace }} | egrep 'rag-minio|rag-milvus' | awk '{print $1}' | wc -l | tr -d '\n' + register: rag_installed + ignore_errors: true + +- name: Uninstall Minio and Milvus helm charts + shell: helm list -n {{ namespace }} | awk '{print $1}' | grep -v NAME | egrep 'rag-minio|rag-milvus' | xargs helm del -n {{ namespace }} + ignore_errors: true + +- name: Delete RAG PVCs + shell: kubectl get pvc -n {{ namespace }} | egrep 'rag-milvus|rag-minio' | awk '{print $1}' | xargs kubectl delete pvc -n {{ namespace }} + ignore_errors: true + +- name: Delete Milvus SA + command: kubectl delete serviceaccount milvus -n {{ namespace }} + ignore_errors: true + +- name: Delete Milvus role + command: kubectl delete role scc-anyuid -n {{ namespace }} + ignore_errors: true + +- name: Delete Milvus rolebinding + command: kubectl delete rolebinding milvus-scc-anyuid-binding -n {{ namespace }} + ignore_errors: true \ No newline at end of file diff --git a/test/e2e/nemo-dependencies/rag/templates/milvus-oc-rbac.yaml.j2 b/test/e2e/nemo-dependencies/rag/templates/milvus-oc-rbac.yaml.j2 new file mode 100644 index 000000000..79e9439e8 --- /dev/null +++ b/test/e2e/nemo-dependencies/rag/templates/milvus-oc-rbac.yaml.j2 @@ -0,0 +1,34 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: rag-milvus + namespace: {{ namespace }} + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: rag-scc-anyuid + namespace: {{ namespace }} +rules: +- apiGroups: ['security.openshift.io'] + resources: ['securitycontextconstraints'] + verbs: ['use'] + resourceNames: ['anyuid'] + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: rag-milvus-scc-anyuid-binding + namespace: {{ namespace }} +subjects: +- kind: ServiceAccount + name: rag-milvus + namespace: {{ namespace }} +roleRef: + kind: Role + name: rag-scc-anyuid + apiGroup: rbac.authorization.k8s.io \ No newline at end of file diff --git a/test/e2e/nemo-dependencies/rag/templates/milvus-values.yaml.j2 b/test/e2e/nemo-dependencies/rag/templates/milvus-values.yaml.j2 new file mode 100644 index 000000000..a32d7f27e --- /dev/null +++ b/test/e2e/nemo-dependencies/rag/templates/milvus-values.yaml.j2 @@ -0,0 +1,30 @@ +serviceName: milvus + +cluster: + enabled: false +etcd: + enabled: false +pulsar: + enabled: false +minio: + enabled: false + tls: + enabled: false +standalone: + persistence: + enabled: true + persistentVolumeClaim: + size: 50Gi + storageClass: {{ '"local-path"' if localPathProvisioner.enabled else '""' }} + extraEnv: + - name: LOG_LEVEL + value: debug +extraConfigFiles: + user.yaml: |+ + etcd: + use: + embed: true + data: + dir: /var/lib/milvus/etcd + common: + storageType: local \ No newline at end of file diff --git a/test/e2e/nemo-dependencies/rag/templates/minio-values.yaml.j2 b/test/e2e/nemo-dependencies/rag/templates/minio-values.yaml.j2 new file mode 100644 index 000000000..30e28ff93 --- /dev/null +++ b/test/e2e/nemo-dependencies/rag/templates/minio-values.yaml.j2 @@ -0,0 +1,10 @@ +auth: + rootUser: {{ minio.username }} + rootPassword: {{ minio.password }} +mode: standalone +persistence: + enabled: true + size: 1Gi + storageClass: {{ '"local-path"' if localPathProvisioner.enabled else '""' }} + annotations: + helm.sh/resource-policy: keep diff --git a/test/e2e/nemo-dependencies/rag/tests/inventory b/test/e2e/nemo-dependencies/rag/tests/inventory new file mode 100644 index 000000000..878877b07 --- /dev/null +++ b/test/e2e/nemo-dependencies/rag/tests/inventory @@ -0,0 +1,2 @@ +localhost + diff --git a/test/e2e/nemo-dependencies/rag/tests/test.yml b/test/e2e/nemo-dependencies/rag/tests/test.yml new file mode 100644 index 000000000..1d11c3aaf --- /dev/null +++ b/test/e2e/nemo-dependencies/rag/tests/test.yml @@ -0,0 +1,5 @@ +--- +- hosts: localhost + remote_user: root + roles: + - rag diff --git a/test/e2e/nemo-dependencies/rag/vars/main.yml b/test/e2e/nemo-dependencies/rag/vars/main.yml new file mode 100644 index 000000000..651c0b74a --- /dev/null +++ b/test/e2e/nemo-dependencies/rag/vars/main.yml @@ -0,0 +1,2 @@ +--- +# vars file for rag blueprint diff --git a/test/e2e/nemo-dependencies/uninstall.yaml b/test/e2e/nemo-dependencies/uninstall.yaml index 0c55c5f61..7232a82b5 100644 --- a/test/e2e/nemo-dependencies/uninstall.yaml +++ b/test/e2e/nemo-dependencies/uninstall.yaml @@ -57,3 +57,15 @@ vars: namespace: "{{ installation_namespace }}" when: uninstall.jupyter == true + +- hosts: localhost + vars_files: + - values.yaml + tasks: + - name: Uninstall RAG Dependencies + include_role: + name: rag + tasks_from: uninstall.yaml + vars: + namespace: "{{ installation_namespace }}" + when: uninstall.rag == true \ No newline at end of file diff --git a/test/e2e/nemo-dependencies/values.yaml b/test/e2e/nemo-dependencies/values.yaml index 252a543eb..92a5eb466 100644 --- a/test/e2e/nemo-dependencies/values.yaml +++ b/test/e2e/nemo-dependencies/values.yaml @@ -4,6 +4,7 @@ install: entity_store: yes evaluator: yes jupyter: yes + rag: yes uninstall: customizer: yes @@ -11,6 +12,7 @@ uninstall: entity_store: yes evaluator: yes jupyter: yes + rag: yes installation_namespace: nemo diff --git a/test/e2e/rag-server/Chart.lock b/test/e2e/rag-server/Chart.lock new file mode 100644 index 000000000..ee8a42a2a --- /dev/null +++ b/test/e2e/rag-server/Chart.lock @@ -0,0 +1,18 @@ +dependencies: +- name: k8s-nim-operator + repository: https://helm.ngc.nvidia.com/nvidia + version: 2.0.0 +- name: frontend + repository: "" + version: v2.1.0 +- name: zipkin + repository: https://zipkin.io/zipkin-helm + version: 0.1.2 +- name: opentelemetry-collector + repository: https://open-telemetry.github.io/opentelemetry-helm-charts + version: 0.78.1 +- name: kube-prometheus-stack + repository: https://prometheus-community.github.io/helm-charts + version: 69.7.2 +digest: sha256:290a7bf4f281ef2766ebb01042787a074753cfdf101a2af9cddce544f802fecb +generated: "2025-06-26T07:57:13.133181-07:00" diff --git a/test/e2e/rag-server/Chart.yaml b/test/e2e/rag-server/Chart.yaml new file mode 100644 index 000000000..d4331ae4d --- /dev/null +++ b/test/e2e/rag-server/Chart.yaml @@ -0,0 +1,19 @@ +apiVersion: v2 +appVersion: v2.1.0 +dependencies: +- condition: k8s-nim-operator.install + name: k8s-nim-operator + repository: https://helm.ngc.nvidia.com/nvidia + version: v2.0.0 +- condition: frontend.enabled + name: frontend + repository: "" + version: v2.1.0 +- condition: ingestor-server.enabled + name: ingestor-server + repository: "" + version: v2.1.0 +description: An end to end Helm chart for the NVIDIA RAG Blueprint +name: nvidia-rag-blueprint +type: application +version: v2.1.0 diff --git a/test/e2e/rag-server/LICENSE b/test/e2e/rag-server/LICENSE new file mode 100644 index 000000000..36ef90e5b --- /dev/null +++ b/test/e2e/rag-server/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 NVIDIA Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/test/e2e/rag-server/charts/frontend/.helmignore b/test/e2e/rag-server/charts/frontend/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/test/e2e/rag-server/charts/frontend/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/test/e2e/rag-server/charts/frontend/Chart.yaml b/test/e2e/rag-server/charts/frontend/Chart.yaml new file mode 100644 index 000000000..0f8b86be6 --- /dev/null +++ b/test/e2e/rag-server/charts/frontend/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +appVersion: v2.1.0 +description: A Helm chart for Kubernetes +name: frontend +type: application +version: v2.1.0 diff --git a/test/e2e/rag-server/charts/frontend/templates/NOTES.txt b/test/e2e/rag-server/charts/frontend/templates/NOTES.txt new file mode 100644 index 000000000..70ba37d49 --- /dev/null +++ b/test/e2e/rag-server/charts/frontend/templates/NOTES.txt @@ -0,0 +1,22 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "frontend.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "frontend.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "frontend.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "frontend.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:3000 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 3000:$CONTAINER_PORT +{{- end }} diff --git a/test/e2e/rag-server/charts/frontend/templates/_helpers.tpl b/test/e2e/rag-server/charts/frontend/templates/_helpers.tpl new file mode 100644 index 000000000..80618c3f6 --- /dev/null +++ b/test/e2e/rag-server/charts/frontend/templates/_helpers.tpl @@ -0,0 +1,69 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "frontend.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "frontend.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "frontend.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "frontend.labels" -}} +helm.sh/chart: {{ include "frontend.chart" . }} +{{ include "frontend.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "frontend.selectorLabels" -}} +app.kubernetes.io/name: {{ include "frontend.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Generate DockerConfigJson for image pull secrets +*/}} +{{- define "imagePullSecret" }} +{{- printf "{\"auths\":{\"%s\":{\"auth\":\"%s\"}}}" .Values.imagePullSecret.registry (printf "%s:%s" .Values.imagePullSecret.username .Values.imagePullSecret.password | b64enc) | b64enc }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "frontend.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "frontend.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/test/e2e/rag-server/charts/frontend/templates/deployment.yaml b/test/e2e/rag-server/charts/frontend/templates/deployment.yaml new file mode 100644 index 000000000..3cafed120 --- /dev/null +++ b/test/e2e/rag-server/charts/frontend/templates/deployment.yaml @@ -0,0 +1,66 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "frontend.fullname" . }} + labels: + {{- include "frontend.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "frontend.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "frontend.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- if .Values.imagePullSecret.name }} + imagePullSecrets: + - name: {{ .Values.imagePullSecret.name }} + {{- end }} + serviceAccountName: {{ include "frontend.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + env: + {{- toYaml .Values.envVars | nindent 12 }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.volumeMounts }} + volumeMounts: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.volumes }} + volumes: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/test/e2e/rag-server/charts/frontend/templates/hpa.yaml b/test/e2e/rag-server/charts/frontend/templates/hpa.yaml new file mode 100644 index 000000000..535b34773 --- /dev/null +++ b/test/e2e/rag-server/charts/frontend/templates/hpa.yaml @@ -0,0 +1,32 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "frontend.fullname" . }} + labels: + {{- include "frontend.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "frontend.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/test/e2e/rag-server/charts/frontend/templates/ingress.yaml b/test/e2e/rag-server/charts/frontend/templates/ingress.yaml new file mode 100644 index 000000000..820eca4f1 --- /dev/null +++ b/test/e2e/rag-server/charts/frontend/templates/ingress.yaml @@ -0,0 +1,43 @@ +{{- if .Values.ingress.enabled -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "frontend.fullname" . }} + labels: + {{- include "frontend.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.ingress.className }} + ingressClassName: {{ . }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- with .pathType }} + pathType: {{ . }} + {{- end }} + backend: + service: + name: {{ include "frontend.fullname" $ }} + port: + number: {{ $.Values.service.port }} + {{- end }} + {{- end }} +{{- end }} diff --git a/test/e2e/rag-server/charts/frontend/templates/secrets.yaml b/test/e2e/rag-server/charts/frontend/templates/secrets.yaml new file mode 100644 index 000000000..f3d90d383 --- /dev/null +++ b/test/e2e/rag-server/charts/frontend/templates/secrets.yaml @@ -0,0 +1,15 @@ +{{- if .Values.imagePullSecret.create }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Values.imagePullSecret.name }} + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/managed-by: Helm + annotations: + meta.helm.sh/release-name: {{ .Release.Name }} + meta.helm.sh/release-namespace: {{ .Release.Namespace }} +type: kubernetes.io/dockerconfigjson +data: + .dockerconfigjson: {{ include "imagePullSecret" . | quote }} +{{- end }} diff --git a/test/e2e/rag-server/charts/frontend/templates/service.yaml b/test/e2e/rag-server/charts/frontend/templates/service.yaml new file mode 100644 index 000000000..708b0eda3 --- /dev/null +++ b/test/e2e/rag-server/charts/frontend/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "frontend.fullname" . }} + labels: + {{- include "frontend.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "frontend.selectorLabels" . | nindent 4 }} diff --git a/test/e2e/rag-server/charts/frontend/templates/serviceaccount.yaml b/test/e2e/rag-server/charts/frontend/templates/serviceaccount.yaml new file mode 100644 index 000000000..22facbdb0 --- /dev/null +++ b/test/e2e/rag-server/charts/frontend/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "frontend.serviceAccountName" . }} + labels: + {{- include "frontend.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} diff --git a/test/e2e/rag-server/charts/frontend/values.yaml b/test/e2e/rag-server/charts/frontend/values.yaml new file mode 100644 index 000000000..718ab9c78 --- /dev/null +++ b/test/e2e/rag-server/charts/frontend/values.yaml @@ -0,0 +1,114 @@ +replicaCount: 1 + +image: + repository: nvcr.io/nvidia/blueprint/rag-playground + pullPolicy: IfNotPresent + tag: "2.1.0" + +imagePullSecret: + name: "ngc-secret" + registry: "nvcr.io" + username: "$oauthtoken" + password: "" + +# This is to override the chart name. +nameOverride: "" +fullnameOverride: "" + +#This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/ +serviceAccount: + create: true + automount: true + annotations: {} + name: "" + +podAnnotations: {} +podLabels: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +service: + type: NodePort + port: 3000 + +ingress: + enabled: false + className: "" + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: chart-example.local + paths: + - path: / + pathType: ImplementationSpecific + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +envVars: + - name: NEXT_PUBLIC_MODEL_NAME + value: "meta/llama-3.1-8b-instruct" + - name: NEXT_PUBLIC_EMBEDDING_MODEL + value: "nvidia/llama-3.2-nv-embedqa-1b-v2" + - name: NEXT_PUBLIC_RERANKER_MODEL + value: "nvidia/llama-3.2-nv-rerankqa-1b-v2" + - name: NEXT_PUBLIC_CHAT_BASE_URL + value: "http://rag-server:8081/v1" + - name: NEXT_PUBLIC_VDB_BASE_URL + value: "http://ingestor-server:8082/v1" + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: / + port: http +readinessProbe: + httpGet: + path: / + port: http + +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + +volumes: [] +# - name: foo +# secret: +# secretName: mysecret +# optional: false + +# Additional volumeMounts on the output Deployment definition. +volumeMounts: [] +# - name: foo +# mountPath: "/etc/foo" +# readOnly: true + +nodeSelector: {} + +tolerations: [] + +affinity: {} diff --git a/test/e2e/rag-server/charts/ingestor-server/Chart.lock b/test/e2e/rag-server/charts/ingestor-server/Chart.lock new file mode 100644 index 000000000..67ac4cf42 --- /dev/null +++ b/test/e2e/rag-server/charts/ingestor-server/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: nv-ingest + repository: https://helm.ngc.nvidia.com/nvidia/nemo-microservices/ + version: 25.4.2 +digest: sha256:f5966daa2ba3713a883791e002d60a00e1dfbfa14e31e421cc846c37e578d574 +generated: "2025-05-07T11:27:04.109994929+05:30" diff --git a/test/e2e/rag-server/charts/ingestor-server/Chart.yaml b/test/e2e/rag-server/charts/ingestor-server/Chart.yaml new file mode 100644 index 000000000..59b47b79b --- /dev/null +++ b/test/e2e/rag-server/charts/ingestor-server/Chart.yaml @@ -0,0 +1,10 @@ +apiVersion: v2 +appVersion: v2.1.0 +dependencies: +- name: nv-ingest + repository: https://helm.ngc.nvidia.com/nvidia/nemo-microservices/ + version: 25.4.2 +description: Helm chart for the NVIDIA RAG Blueprint Ingestor Server +name: ingestor-server +type: application +version: v2.1.0 diff --git a/test/e2e/rag-server/charts/ingestor-server/templates/_helpers.tpl b/test/e2e/rag-server/charts/ingestor-server/templates/_helpers.tpl new file mode 100644 index 000000000..a016d1a76 --- /dev/null +++ b/test/e2e/rag-server/charts/ingestor-server/templates/_helpers.tpl @@ -0,0 +1,6 @@ +{{/* +Generate DockerConfigJson for image pull secrets +*/}} +{{- define "imagePullSecret" }} +{{- printf "{\"auths\":{\"%s\":{\"auth\":\"%s\"}}}" .Values.imagePullSecret.registry (printf "%s:%s" .Values.imagePullSecret.username .Values.imagePullSecret.password | b64enc) | b64enc }} +{{- end }} diff --git a/test/e2e/rag-server/charts/ingestor-server/templates/deployment.yaml b/test/e2e/rag-server/charts/ingestor-server/templates/deployment.yaml new file mode 100644 index 000000000..e4206c32e --- /dev/null +++ b/test/e2e/rag-server/charts/ingestor-server/templates/deployment.yaml @@ -0,0 +1,46 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ingestor-server + labels: + app: ingestor-server +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + app: ingestor-server + template: + metadata: + labels: + app: ingestor-server + spec: + {{- if .Values.imagePullSecret.name }} + imagePullSecrets: + - name: {{ .Values.imagePullSecret.name }} + {{- end }} + containers: + - name: ingestor-server + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: + - "uvicorn" + - "src.ingestor_server.server:app" + - "--port" + - "8082" + - "--host" + - "0.0.0.0" + - "--workers" + - "{{ .Values.server.workers }}" + ports: + - containerPort: 8082 + {{ if .Values.envVars }} + env: + {{- if .Values.envVars }} + {{- range $k, $v := .Values.envVars }} + - name: "{{ $k }}" + value: "{{ $v }}" + {{- end }} + {{- end }} + {{- end }} + resources: +{{ toYaml .Values.resources | nindent 12 }} \ No newline at end of file diff --git a/test/e2e/rag-server/charts/ingestor-server/templates/secrets.yaml b/test/e2e/rag-server/charts/ingestor-server/templates/secrets.yaml new file mode 100644 index 000000000..d8e9fc5c3 --- /dev/null +++ b/test/e2e/rag-server/charts/ingestor-server/templates/secrets.yaml @@ -0,0 +1,15 @@ +{{- if .Values.imagePullSecret.create }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Values.imagePullSecret.name }} + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/managed-by: Helm + annotations: + meta.helm.sh/release-name: {{ .Release.Name }} + meta.helm.sh/release-namespace: {{ .Release.Namespace }} +type: kubernetes.io/dockerconfigjson +data: + .dockerconfigjson: {{ include "imagePullSecret" . | quote }} +{{- end }} \ No newline at end of file diff --git a/test/e2e/rag-server/charts/ingestor-server/templates/service.yaml b/test/e2e/rag-server/charts/ingestor-server/templates/service.yaml new file mode 100644 index 000000000..3008b42a8 --- /dev/null +++ b/test/e2e/rag-server/charts/ingestor-server/templates/service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: ingestor-server +spec: + selector: + app: ingestor-server + ports: + - protocol: TCP + port: 8082 + targetPort: 8082 + type: ClusterIP diff --git a/test/e2e/rag-server/charts/ingestor-server/values.yaml b/test/e2e/rag-server/charts/ingestor-server/values.yaml new file mode 100644 index 000000000..0621e4fba --- /dev/null +++ b/test/e2e/rag-server/charts/ingestor-server/values.yaml @@ -0,0 +1,165 @@ +replicaCount: 1 + +imagePullSecret: + create: false + name: "ngc-secret" + registry: "nvcr.io" + username: "$oauthtoken" + password: "" + +image: + repository: nvcr.io/nvidia/blueprint/ingestor-server + tag: "2.1.0" + pullPolicy: IfNotPresent + +server: + workers: 1 + +resources: + limits: + memory: "25Gi" + requests: + memory: "25Gi" + +envVars: + # === Vector Store Configurations === + APP_VECTORSTORE_URL: "http://milvus:19530" + APP_VECTORSTORE_NAME: "milvus" + APP_VECTORSTORE_SEARCHTYPE: "dense" + APP_VECTORSTORE_ENABLEGPUINDEX: "True" + APP_VECTORSTORE_ENABLEGPUSEARCH: "True" + COLLECTION_NAME: "multimodal_data" + + # === MinIO Configurations === + MINIO_ENDPOINT: "rag-minio:9000" + MINIO_ACCESSKEY: "minioadmin" + MINIO_SECRETKEY: "minioadmin" + + # === Embeddings Configurations === + APP_EMBEDDINGS_SERVERURL: "nemo-retriever-embedding-ms:8000" + APP_EMBEDDINGS_MODELNAME: "nvidia/llama-3.2-nv-embedqa-1b-v2" + APP_EMBEDDINGS_DIMENSIONS: "2048" + + # === NV-Ingest Configurations === + APP_NVINGEST_MESSAGECLIENTHOSTNAME: "rag-nv-ingest" + APP_NVINGEST_MESSAGECLIENTPORT: "7670" + + # === NV-Ingest extraction configurations === + APP_NVINGEST_PDFEXTRACTMETHOD: "None" # Method used for text extraction from "None", "pdfium", "nemoretriever_parse" + APP_NVINGEST_EXTRACTTEXT: "True" # Enable text extraction + APP_NVINGEST_EXTRACTTABLES: "True" # Enable table extraction + APP_NVINGEST_EXTRACTCHARTS: "True" # Enable chart extraction + APP_NVINGEST_EXTRACTIMAGES: "False" # Enable image extraction + APP_NVINGEST_TEXTDEPTH: "page" # Extract text by "page" or "document" + + # === NV-Ingest caption configurations === + APP_NVINGEST_CAPTIONMODELNAME: "meta/llama-3.2-11b-vision-instruct" # Model name for captioning + APP_NVINGEST_CAPTIONENDPOINTURL: "" # Endpoint URL for captioning model + + # === General === + ENABLE_CITATIONS: "True" + LOGLEVEL: "INFO" + + # === NV-Ingest splitting configurations === + APP_NVINGEST_CHUNKSIZE: "1024" # Size of chunks for splitting + APP_NVINGEST_CHUNKOVERLAP: "150" # Overlap size for chunks + APP_NVINGEST_ENABLEPDFSPLITTER: "True" # Enable PDF splitter + + # === Redis configurations === + REDIS_HOST: "rag-redis-master" + REDIS_PORT: "6379" + REDIS_DB: "0" + + # === Bulk upload to MinIO === + ENABLE_MINIO_BULK_UPLOAD: "False" + +# NV-Ingest +nv-ingest: + imagePullSecrets: + - name: "ngc-secret" + ngcApiSecret: + create: false + ngcImagePullSecret: + create: false + image: + repository: "nvcr.io/nvidia/nemo-microservices/nv-ingest" + tag: "25.4.1" + resources: + envVars: + INGEST_LOG_LEVEL: DEFAULT + INGEST_EDGE_BUFFER_SIZE: 64 + MRC_IGNORE_NUMA_CHECK: 1 + READY_CHECK_ALL_COMPONENTS: "true" + REDIS_MORPHEUS_TASK_QUEUE: morpheus_task_queue + NV_INGEST_DEFAULT_TIMEOUT_MS: "1234" + MAX_INGEST_PROCESS_WORKERS: 16 + EMBEDDING_NIM_ENDPOINT: "http://nemo-retriever-embedding-ms:8000/v1" + MESSAGE_CLIENT_HOST: "rag-redis-master" + MESSAGE_CLIENT_PORT: 6379 + MESSAGE_CLIENT_TYPE: "redis" + MINIO_INTERNAL_ADDRESS: "rag-minio:9000" + MILVUS_ENDPOINT: "http://milvus:19530" + OTEL_EXPORTER_OTLP_ENDPOINT: "otel-collector:4317" + MODEL_PREDOWNLOAD_PATH: "/workspace/models/" + + # WAR to fix -loadbalancer from the ingestion NIMs URLs + PADDLE_GRPC_ENDPOINT: nv-ingest-paddle:8001 + PADDLE_HTTP_ENDPOINT: http://nv-ingest-paddle:8000/v1/infer + PADDLE_INFER_PROTOCOL: grpc + YOLOX_GRPC_ENDPOINT: nemoretriever-page-elements-v2:8001 + YOLOX_HTTP_ENDPOINT: http://nemoretriever-page-elements-v2:8000/v1/infer + YOLOX_INFER_PROTOCOL: grpc + YOLOX_GRAPHIC_ELEMENTS_GRPC_ENDPOINT: nemoretriever-graphic-elements-v1:8001 + YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT: http://nemoretriever-graphic-elements-v1:8000/v1/infer + YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL: grpc + YOLOX_TABLE_STRUCTURE_GRPC_ENDPOINT: nemoretriever-table-structure-v1:8001 + YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT: http://nemoretriever-table-structure-v1:8000/v1/infer + YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL: grpc + + paddleocr-nim: + image: + repository: nvcr.io/nim/baidu/paddleocr + tag: "1.2.0" + imagePullSecrets: + - name: ngc-secret + + nemoretriever-graphic-elements-v1: + image: + repository: nvcr.io/nim/nvidia/nemoretriever-graphic-elements-v1 + tag: "1.2.0" + + nemoretriever-page-elements-v2: + image: + repository: nvcr.io/nim/nvidia/nemoretriever-page-elements-v2 + tag: "1.2.0" + + nemoretriever-table-structure-v1: + image: + repository: nvcr.io/nim/nvidia/nemoretriever-table-structure-v1 + tag: "1.2.0" + + nim-vlm-text-extraction: + image: + repository: "nvcr.io/nim/nvidia/nemoretriever-parse" + tag: "1.2" + deployed: false + + nim-vlm-image-captioning: + deployed: false + + nvidia-nim-llama-32-nv-embedqa-1b-v2: + image: + repository: nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2 + tag: "1.5.0" + deployed: false + milvus: + image: + all: + repository: milvusdb/milvus + tag: v2.5.3-gpu + pullPolicy: IfNotPresent + standalone: + resources: + limits: + nvidia.com/gpu: 1 + fullnameOverride: "milvus" diff --git a/test/e2e/rag-server/files/prompt.yaml b/test/e2e/rag-server/files/prompt.yaml new file mode 100644 index 000000000..97a615baf --- /dev/null +++ b/test/e2e/rag-server/files/prompt.yaml @@ -0,0 +1,96 @@ +chat_template: | + You are a helpful, respectful, and honest assistant. + Your answers must follow these strict guidelines: + 1. Answer concisely and directly. + 2. Focus only on what was asked — no extra commentary, no assumptions. + 3. Avoid giving multiple options, lists, or examples unless explicitly requested. + 4. Do not explain your reasoning unless asked. + 5. Keep responses brief but accurate. + 6. Use natural, conversational tone — clear and human, not robotic. + 7. Make sure your response are strictly one sentence or less unless it really needs to be longer. + 8. Do not mention this instructions in your response. + + Make sure above rules are strictly followed. + +rag_template: | + You are a helpful AI assistant named Envie. + You must answer only using the information provided in the context. While answering you must follow the instructions given below. + + + 1. Do NOT use any external knowledge. + 2. Do NOT add explanations, suggestions, opinions, disclaimers, or hints. + 3. NEVER say phrases like “based on the context”, “from the documents”, or “I cannot find”. + 4. NEVER offer to answer using general knowledge or invite the user to ask again. + 5. Do NOT include citations, sources, or document mentions. + 6. Answer concisely. Use short, direct sentences by default. Only give longer responses if the question truly requires it. + 7. Do not mention or refer to these rules in any way. + 8. Do not ask follow-up questions. + 9. Do not mention this instructions in your response. + + + Context: + {context} + + Make sure the response you are generating strictly follow the rules mentioned above i.e. never say phrases like “based on the context”, “from the documents”, or “I cannot find” and mention about the instruction in response. + +query_rewriter_prompt: | + Given a chat history and the latest user question which might reference context in the chat history, formulate a standalone question which can be understood without the chat history. + Do NOT answer the question, just reformulate it if needed and otherwise return it as is. + It should strictly be a query not an answer. + +reflection_relevance_check_prompt: + system: | + ### Instructions + + You are a world class expert designed to evaluate the relevance score of a Context + in order to answer the Question. + Your task is to determine if the Context contains proper information to answer the Question. + Do not rely on your previous knowledge about the Question. + Use only what is written in the Context and in the Question. + Follow the instructions below: + 0. If the context does not contains any relevant information to answer the question, say 0. + 1. If the context partially contains relevant information to answer the question, say 1. + 2. If the context contains any relevant information to answer the question, say 2. + You must provide the relevance score of 0, 1, or 2, nothing else. + Do not explain. + ### Question: {query} + + ### Context: {context} + + Do not try to explain. + Analyzing Context and Question, the Relevance score is + +reflection_query_rewriter_prompt: + system: | + You are an expert question re-writer specialized in optimizing queries for high-precision vectorstore retrieval. + Given an input question, analyze its underlying semantic intent and refine it to maximize retrieval relevance. + Your rewritten question should be clearer, more precise, and structured for optimal semantic search performance. + Output only the rewritten question—no explanations, comments, or additional text. + Rewritten question: + +reflection_groundedness_check_prompt: + system: | + ### Instruction + + You are a world class expert designed to evaluate the groundedness of an assertion. + You will be provided with an assertion and a context. + Your task is to determine if the assertion is supported by the context. + Follow the instructions below: + A. If there is no context or no assertion or context is empty or assertion is empty, say 0. + B. If the assertion is not supported by the context, say 0. + C. If the assertion is partially supported by the context, say 1. + D. If the assertion is fully supported by the context, say 2. + You must provide a rating of 0, 1, or 2, nothing else. + + ### Context: + <{context}> + + ### Assertion: + <{response}> + + Analyzing Context and Response, the Groundedness score is + +reflection_response_regeneration_prompt: + system: | + You are a helpful AI assistant. Generate a new response that is more grounded + in the provided context. Use only information that is explicitly supported by the context. \ No newline at end of file diff --git a/test/e2e/rag-server/templates/_helpers.tpl b/test/e2e/rag-server/templates/_helpers.tpl new file mode 100644 index 000000000..8437f89f3 --- /dev/null +++ b/test/e2e/rag-server/templates/_helpers.tpl @@ -0,0 +1,59 @@ +{{/* +Generate DockerConfigJson for image pull secrets +*/}} +{{- define "imagePullSecret" }} +{{- printf "{\"auths\":{\"%s\":{\"auth\":\"%s\"}}}" .Values.imagePullSecret.registry (printf "%s:%s" .Values.imagePullSecret.username .Values.imagePullSecret.password | b64enc) | b64enc }} +{{- end }} + +{{/* +Create secret to access NGC Api +*/}} +{{- define "ngcApiSecret" }} +{{- printf "%s" .Values.ngcApiSecret.password | b64enc }} +{{- end }} + +{{- define "generateGPUDPResource" }} +{{- if .migProfile -}} +nvidia.com/mig-{{ .migProfile }}: {{ .count | default 1 }} +{{- else -}} +nvidia.com/gpu: {{ .count | default 1 }} +{{- end -}} +{{- end }} + +{{/* +Merge GPU resources with existing resources, avoiding duplication +*/}} +{{- define "mergeResources" }} +{{- $existingResources := .existingResources }} +{{- $gpuConfig := .gpuConfig }} +{{- $draEnabled := .draEnabled }} +{{- $mergedResources := dict }} +{{- if $existingResources }} +{{- $mergedResources = deepCopy $existingResources }} +{{- end }} +{{- if and $gpuConfig (not $draEnabled) }} +{{- $gpuResourceKey := "" }} +{{- if $gpuConfig.migProfile }} +{{- $gpuResourceKey = printf "nvidia.com/mig-%s" $gpuConfig.migProfile }} +{{- else }} +{{- $gpuResourceKey = "nvidia.com/gpu" }} +{{- end }} +{{- if not (hasKey $mergedResources $gpuResourceKey) }} +{{- $_ := set $mergedResources $gpuResourceKey ($gpuConfig.count | default 1) }} +{{- end }} +{{- end }} +{{- toYaml $mergedResources }} +{{- end }} + +{{- define "generateGPUDRAResources" -}} +{{- $count := .count | default 1 -}} +{{- range $i := until ($count | int) }} +{{- if $.migProfile -}} +- resourceClaimTemplateName: mig-claim-template-{{ $.migProfile }} +{{- else -}} +- resourceClaimTemplateName: gpu-claim-template +{{- end -}} +{{- if lt (add $i 1) ($count | int) }} +{{ end }} +{{- end -}} +{{- end }} \ No newline at end of file diff --git a/test/e2e/rag-server/templates/configmap.yaml b/test/e2e/rag-server/templates/configmap.yaml new file mode 100644 index 000000000..f2e1c9027 --- /dev/null +++ b/test/e2e/rag-server/templates/configmap.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: prompt-config +data: + prompt.yaml: |- +{{ .Files.Get "files/prompt.yaml" | indent 4 }} diff --git a/test/e2e/rag-server/templates/gpu-claim-template.yaml b/test/e2e/rag-server/templates/gpu-claim-template.yaml new file mode 100644 index 000000000..1ab74f60f --- /dev/null +++ b/test/e2e/rag-server/templates/gpu-claim-template.yaml @@ -0,0 +1,16 @@ +{{- if .Values.dra.enabled }} +--- +apiVersion: resource.k8s.io/v1beta2 +kind: ResourceClaimTemplate +metadata: + name: gpu-claim-template +spec: + spec: + devices: + requests: + - exactly: + allocationMode: ExactCount + count: 1 + deviceClassName: gpu.nvidia.com + name: gpu +{{- end }} \ No newline at end of file diff --git a/test/e2e/rag-server/templates/mig-claim-template.yaml b/test/e2e/rag-server/templates/mig-claim-template.yaml new file mode 100644 index 000000000..29c5aae94 --- /dev/null +++ b/test/e2e/rag-server/templates/mig-claim-template.yaml @@ -0,0 +1,31 @@ +{{- if .Values.dra.enabled }} +{{- range .Values.dra.migClaims.profiles }} +--- +apiVersion: resource.k8s.io/v1beta2 +kind: ResourceClaimTemplate +metadata: + name: mig-claim-template-{{ . }} +spec: + spec: + devices: + config: + - requests: + - mig + opaque: + driver: gpu.nvidia.com + parameters: + apiVersion: resource.nvidia.com/v1beta1 + kind: MigDeviceConfig + sharing: + strategy: TimeSlicing + requests: + - exactly: + allocationMode: ExactCount + count: 1 + deviceClassName: mig.nvidia.com + selectors: + - cel: + expression: "device.attributes['gpu.nvidia.com'].profile == '{{ . }}'" + name: mig +{{- end }} +{{- end }} \ No newline at end of file diff --git a/test/e2e/rag-server/templates/nimpipeline.yaml b/test/e2e/rag-server/templates/nimpipeline.yaml new file mode 100644 index 000000000..61fedb5f1 --- /dev/null +++ b/test/e2e/rag-server/templates/nimpipeline.yaml @@ -0,0 +1,368 @@ +apiVersion: apps.nvidia.com/v1alpha1 +kind: NIMPipeline +metadata: + name: rag-blueprint +spec: + services: + - name: {{ .Values.llm.service.name }} + enabled: {{ .Values.llm.enabled }} + spec: + image: + repository: {{ .Values.llm.image.repository }} + tag: {{ .Values.llm.image.tag | quote }} + pullPolicy: {{ .Values.llm.image.pullPolicy }} + pullSecrets: + - {{ .Values.imagePullSecret.name }} + authSecret: {{ .Values.ngcApiSecret.name }} + storage: + pvc: + create: {{ .Values.llm.pvc.create }} + storageClass: {{ .Values.llm.pvc.storageClass }} + size: {{ .Values.llm.pvc.size }} + {{ with .Values.llm.pvc.annotations }} + annotations: + {{- toYaml . | nindent 14 }} + {{- end }} + volumeAccessMode: {{ .Values.llm.pvc.accessMode }} + replicas: {{ .Values.llm.replicas | default 1 }} + resources: + {{- $requestsResources := include "mergeResources" (dict "existingResources" .Values.llm.resources.requests "gpuConfig" .Values.llm.gpu "draEnabled" .Values.dra.enabled) | fromYaml }} + {{- if $requestsResources }} + requests: + {{- toYaml $requestsResources | nindent 12 }} + {{- end }} + {{- $limitsResources := include "mergeResources" (dict "existingResources" .Values.llm.resources.limits "gpuConfig" .Values.llm.gpu "draEnabled" .Values.dra.enabled) | fromYaml }} + {{- if $limitsResources }} + limits: + {{- toYaml $limitsResources | nindent 12 }} + {{- end }} + {{- if .Values.dra.enabled }} + draResources: + {{- include "generateGPUDRAResources" .Values.llm.gpu | nindent 8 }} + {{- end }} + expose: + service: + type: {{ .Values.llm.service.type | default "ClusterIP" }} + port: {{ .Values.llm.service.port | default 8000 }} + {{- if .Values.llm.autoScale.enabled }} + scale: + enabled: {{ .Values.llm.autoScale.enabled }} + hpa: + {{- toYaml .Values.llm.autoScale.hpa | nindent 12 }} + {{- end }} + env: + {{- with .Values.llm.env }} + {{- toYaml . | nindent 10 }} + {{- end }} + - name: {{ .Values.embedding.service.name }} + enabled: {{ default false .Values.embedding.enabled }} + spec: + image: + repository: {{ .Values.embedding.image.repository }} + tag: {{ .Values.embedding.image.tag | quote }} + pullPolicy: {{ .Values.embedding.image.pullPolicy }} + pullSecrets: + - {{ .Values.imagePullSecret.name }} + authSecret: {{ .Values.ngcApiSecret.name }} + storage: + pvc: + create: {{ default false .Values.embedding.pvc.create }} + storageClass: {{ .Values.embedding.pvc.storageClass }} + size: {{ .Values.embedding.pvc.size | quote }} + {{- with .Values.embedding.pvc.annotations }} + annotations: + {{- toYaml . | nindent 14 }} + {{- end }} + volumeAccessMode: {{ .Values.embedding.pvc.accessMode }} + replicas: {{ .Values.embedding.replicas | default 1 }} + resources: + {{- $requestsResources := include "mergeResources" (dict "existingResources" .Values.embedding.resources.requests "gpuConfig" .Values.embedding.gpu "draEnabled" .Values.dra.enabled) | fromYaml }} + {{- if $requestsResources }} + requests: + {{- toYaml $requestsResources | nindent 12 }} + {{- end }} + {{- $limitsResources := include "mergeResources" (dict "existingResources" .Values.embedding.resources.limits "gpuConfig" .Values.embedding.gpu "draEnabled" .Values.dra.enabled) | fromYaml }} + {{- if $limitsResources }} + limits: + {{- toYaml $limitsResources | nindent 12 }} + {{- end }} + {{- if .Values.dra.enabled }} + draResources: + {{- include "generateGPUDRAResources" .Values.embedding.gpu | nindent 8 }} + {{- end }} + expose: + service: + type: {{ .Values.embedding.service.type | default "ClusterIP" }} + port: {{ .Values.embedding.service.port | default 8000 }} + grpcPort: {{ .Values.embedding.service.grpcPort | default 8001 }} + metricsPort: {{ .Values.embedding.service.metricsPort | default 8002 }} + {{- if .Values.embedding.autoScale.enabled }} + scale: + enabled: {{ .Values.embedding.autoScale.enabled }} + hpa: + {{- toYaml .Values.embedding.autoScale.hpa | nindent 12 }} + {{- end }} + env: + {{- with .Values.embedding.env }} + {{- toYaml . | nindent 10 }} + {{- end }} + - name: {{ .Values.reranking.service.name }} + enabled: {{ default false .Values.reranking.enabled }} + spec: + image: + repository: {{ .Values.reranking.image.repository }} + tag: {{ .Values.reranking.image.tag | quote }} + pullPolicy: {{ .Values.reranking.image.pullPolicy }} + pullSecrets: + - {{ .Values.imagePullSecret.name }} + authSecret: {{ .Values.ngcApiSecret.name }} + storage: + pvc: + create: {{ default false .Values.reranking.pvc.create }} + storageClass: {{ .Values.reranking.pvc.storageClass }} + size: {{ .Values.reranking.pvc.size }} + {{- with .Values.reranking.pvc.annotations }} + annotations: + {{- toYaml . | nindent 14 }} + {{- end }} + volumeAccessMode: {{ .Values.reranking.pvc.accessMode }} + replicas: {{ .Values.reranking.replicas | default 1 }} + resources: + {{- $requestsResources := include "mergeResources" (dict "existingResources" .Values.reranking.resources.requests "gpuConfig" .Values.reranking.gpu "draEnabled" .Values.dra.enabled) | fromYaml }} + {{- if $requestsResources }} + requests: + {{- toYaml $requestsResources | nindent 12 }} + {{- end }} + {{- $limitsResources := include "mergeResources" (dict "existingResources" .Values.reranking.resources.limits "gpuConfig" .Values.reranking.gpu "draEnabled" .Values.dra.enabled) | fromYaml }} + {{- if $limitsResources }} + limits: + {{- toYaml $limitsResources | nindent 12 }} + {{- end }} + {{- if .Values.dra.enabled }} + draResources: + {{- include "generateGPUDRAResources" .Values.reranking.gpu | nindent 8 }} + {{- end }} + expose: + service: + type: {{ .Values.reranking.service.type | default "ClusterIP" }} + port: {{ .Values.reranking.service.port | default 8000 }} + grpcPort: {{ .Values.reranking.service.grpcPort | default 8001 }} + metricsPort: {{ .Values.reranking.service.metricsPort | default 8002 }} + {{- if .Values.reranking.autoScale.enabled }} + scale: + enabled: {{ .Values.reranking.autoScale.enabled }} + hpa: + {{- toYaml .Values.reranking.autoScale.hpa | nindent 12 }} + {{- end }} + env: + {{- with .Values.reranking.env }} + {{- toYaml . | nindent 10 }} + {{- end }} + - name: {{ .Values.paddleOcr.service.name }} + enabled: {{ .Values.paddleOcr.enabled }} + spec: + image: + repository: {{ .Values.paddleOcr.image.repository }} + tag: {{ .Values.paddleOcr.image.tag | quote }} + pullPolicy: {{ .Values.paddleOcr.image.pullPolicy }} + pullSecrets: + - {{ .Values.imagePullSecret.name }} + authSecret: {{ .Values.ngcApiSecret.name }} + storage: + pvc: + create: {{ .Values.paddleOcr.pvc.create }} + storageClass: {{ .Values.paddleOcr.pvc.storageClass }} + size: {{ .Values.paddleOcr.pvc.size }} + {{ with .Values.paddleOcr.pvc.annotations }} + annotations: + {{- toYaml . | nindent 14 }} + {{- end }} + volumeAccessMode: {{ .Values.llm.pvc.accessMode }} + replicas: {{ .Values.paddleOcr.replicas | default 1 }} + resources: + {{- $requestsResources := include "mergeResources" (dict "existingResources" .Values.paddleOcr.resources.requests "gpuConfig" .Values.paddleOcr.gpu "draEnabled" .Values.dra.enabled) | fromYaml }} + {{- if $requestsResources }} + requests: + {{- toYaml $requestsResources | nindent 12 }} + {{- end }} + {{- $limitsResources := include "mergeResources" (dict "existingResources" .Values.paddleOcr.resources.limits "gpuConfig" .Values.paddleOcr.gpu "draEnabled" .Values.dra.enabled) | fromYaml }} + {{- if $limitsResources }} + limits: + {{- toYaml $limitsResources | nindent 12 }} + {{- end }} + {{- if .Values.dra.enabled }} + draResources: + {{- include "generateGPUDRAResources" .Values.paddleOcr.gpu | nindent 8 }} + {{- end }} + expose: + service: + type: {{ .Values.paddleOcr.service.type | default "ClusterIP" }} + port: {{ .Values.paddleOcr.service.port | default 8000 }} + grpcPort: {{ .Values.paddleOcr.service.grpcPort | default 8001 }} + metricsPort: {{ .Values.paddleOcr.service.metricsPort | default 8002 }} + {{- if .Values.paddleOcr.autoScale.enabled }} + scale: + enabled: {{ .Values.paddleOcr.autoScale.enabled }} + hpa: + {{- toYaml .Values.paddleOcr.autoScale.hpa | nindent 12 }} + {{- end }} + env: + {{- with .Values.paddleOcr.env }} + {{- toYaml . | nindent 10 }} + {{- end }} + - name: {{ .Values.graphicElements.service.name }} + enabled: {{ .Values.graphicElements.enabled }} + spec: + image: + repository: {{ .Values.graphicElements.image.repository }} + tag: {{ .Values.graphicElements.image.tag | quote }} + pullPolicy: {{ .Values.graphicElements.image.pullPolicy }} + pullSecrets: + - {{ .Values.imagePullSecret.name }} + authSecret: {{ .Values.ngcApiSecret.name }} + storage: + pvc: + create: {{ .Values.graphicElements.pvc.create }} + storageClass: {{ .Values.graphicElements.pvc.storageClass }} + size: {{ .Values.graphicElements.pvc.size }} + {{ with .Values.graphicElements.pvc.annotations }} + annotations: + {{- toYaml . | nindent 14 }} + {{- end }} + volumeAccessMode: {{ .Values.graphicElements.pvc.accessMode }} + replicas: {{ .Values.graphicElements.replicas | default 1 }} + resources: + {{- $requestsResources := include "mergeResources" (dict "existingResources" .Values.graphicElements.resources.requests "gpuConfig" .Values.graphicElements.gpu "draEnabled" .Values.dra.enabled) | fromYaml }} + {{- if $requestsResources }} + requests: + {{- toYaml $requestsResources | nindent 12 }} + {{- end }} + {{- $limitsResources := include "mergeResources" (dict "existingResources" .Values.graphicElements.resources.limits "gpuConfig" .Values.graphicElements.gpu "draEnabled" .Values.dra.enabled) | fromYaml }} + {{- if $limitsResources }} + limits: + {{- toYaml $limitsResources | nindent 12 }} + {{- end }} + {{- if .Values.dra.enabled }} + draResources: + {{- include "generateGPUDRAResources" .Values.graphicElements.gpu | nindent 8 }} + {{- end }} + expose: + service: + type: {{ .Values.graphicElements.service.type | default "ClusterIP" }} + port: {{ .Values.graphicElements.service.port | default 8000 }} + grpcPort: {{ .Values.graphicElements.service.grpcPort | default 8001 }} + metricsPort: {{ .Values.graphicElements.service.metricsPort | default 8002 }} + {{- if .Values.graphicElements.autoScale.enabled }} + scale: + enabled: {{ .Values.graphicElements.autoScale.enabled }} + hpa: + {{- toYaml .Values.graphicElements.autoScale.hpa | nindent 12 }} + {{- end }} + env: + {{- with .Values.graphicElements.env }} + {{- toYaml . | nindent 10 }} + {{- end }} + - name: {{ .Values.pageElements.service.name }} + enabled: {{ .Values.pageElements.enabled }} + spec: + image: + repository: {{ .Values.pageElements.image.repository }} + tag: {{ .Values.pageElements.image.tag | quote }} + pullPolicy: {{ .Values.pageElements.image.pullPolicy }} + pullSecrets: + - {{ .Values.imagePullSecret.name }} + authSecret: {{ .Values.ngcApiSecret.name }} + storage: + pvc: + create: {{ .Values.pageElements.pvc.create }} + storageClass: {{ .Values.pageElements.pvc.storageClass }} + size: {{ .Values.pageElements.pvc.size }} + {{ with .Values.pageElements.pvc.annotations }} + annotations: + {{- toYaml . | nindent 14 }} + {{- end }} + volumeAccessMode: {{ .Values.pageElements.pvc.accessMode }} + replicas: {{ .Values.pageElements.replicas | default 1 }} + resources: + {{- $requestsResources := include "mergeResources" (dict "existingResources" .Values.pageElements.resources.requests "gpuConfig" .Values.pageElements.gpu "draEnabled" .Values.dra.enabled) | fromYaml }} + {{- if $requestsResources }} + requests: + {{- toYaml $requestsResources | nindent 12 }} + {{- end }} + {{- $limitsResources := include "mergeResources" (dict "existingResources" .Values.pageElements.resources.limits "gpuConfig" .Values.pageElements.gpu "draEnabled" .Values.dra.enabled) | fromYaml }} + {{- if $limitsResources }} + limits: + {{- toYaml $limitsResources | nindent 12 }} + {{- end }} + {{- if .Values.dra.enabled }} + draResources: + {{- include "generateGPUDRAResources" .Values.pageElements.gpu | nindent 8 }} + {{- end }} + expose: + service: + type: {{ .Values.pageElements.service.type | default "ClusterIP" }} + port: {{ .Values.pageElements.service.port | default 8000 }} + grpcPort: {{ .Values.pageElements.service.grpcPort | default 8001 }} + metricsPort: {{ .Values.pageElements.service.metricsPort | default 8002 }} + {{- if .Values.pageElements.autoScale.enabled }} + scale: + enabled: {{ .Values.pageElements.autoScale.enabled }} + hpa: + {{- toYaml .Values.pageElements.autoScale.hpa | nindent 12 }} + {{- end }} + env: + {{- with .Values.pageElements.env }} + {{- toYaml . | nindent 10 }} + {{- end }} + - name: {{ .Values.tableStructure.service.name }} + enabled: {{ .Values.tableStructure.enabled }} + spec: + image: + repository: {{ .Values.tableStructure.image.repository }} + tag: {{ .Values.tableStructure.image.tag | quote }} + pullPolicy: {{ .Values.tableStructure.image.pullPolicy }} + pullSecrets: + - {{ .Values.imagePullSecret.name }} + authSecret: {{ .Values.ngcApiSecret.name }} + storage: + pvc: + create: {{ .Values.tableStructure.pvc.create }} + storageClass: {{ .Values.tableStructure.pvc.storageClass }} + size: {{ .Values.tableStructure.pvc.size }} + {{ with .Values.tableStructure.pvc.annotations }} + annotations: + {{- toYaml . | nindent 14 }} + {{- end }} + volumeAccessMode: {{ .Values.tableStructure.pvc.accessMode }} + replicas: {{ .Values.tableStructure.replicas | default 1 }} + resources: + {{- $requestsResources := include "mergeResources" (dict "existingResources" .Values.tableStructure.resources.requests "gpuConfig" .Values.tableStructure.gpu "draEnabled" .Values.dra.enabled) | fromYaml }} + {{- if $requestsResources }} + requests: + {{- toYaml $requestsResources | nindent 12 }} + {{- end }} + {{- $limitsResources := include "mergeResources" (dict "existingResources" .Values.tableStructure.resources.limits "gpuConfig" .Values.tableStructure.gpu "draEnabled" .Values.dra.enabled) | fromYaml }} + {{- if $limitsResources }} + limits: + {{- toYaml $limitsResources | nindent 12 }} + {{- end }} + {{- if .Values.dra.enabled }} + draResources: + {{- include "generateGPUDRAResources" .Values.tableStructure.gpu | nindent 8 }} + {{- end }} + expose: + service: + type: {{ .Values.tableStructure.service.type | default "ClusterIP" }} + port: {{ .Values.tableStructure.service.port | default 8000 }} + grpcPort: {{ .Values.tableStructure.service.grpcPort | default 8001 }} + metricsPort: {{ .Values.tableStructure.service.metricsPort | default 8002 }} + {{- if .Values.tableStructure.autoScale.enabled }} + scale: + enabled: {{ .Values.tableStructure.autoScale.enabled }} + hpa: + {{- toYaml .Values.tableStructure.autoScale.hpa | nindent 12 }} + {{- end }} + env: + {{- with .Values.tableStructure.env }} + {{- toYaml . | nindent 10 }} + {{- end }} diff --git a/test/e2e/rag-server/templates/rag-server.yaml b/test/e2e/rag-server/templates/rag-server.yaml new file mode 100644 index 000000000..bbda66b34 --- /dev/null +++ b/test/e2e/rag-server/templates/rag-server.yaml @@ -0,0 +1,57 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: rag-server + labels: + app: rag-server +spec: + replicas: {{ .Values.ragServer.replicaCount }} + selector: + matchLabels: + app: rag-server + template: + metadata: + labels: + app: rag-server + spec: + {{- if .Values.ragServer.imagePullSecret }} + imagePullSecrets: + - name: {{ .Values.ragServer.imagePullSecret.name }} + {{- end }} + containers: + - name: rag-server + image: "{{ .Values.ragServer.image.repository }}:{{ .Values.ragServer.image.tag }}" + imagePullPolicy: {{ .Values.ragServer.image.pullPolicy }} + command: + - "uvicorn" + - "src.server:app" + - "--port" + - "8081" + - "--host" + - "0.0.0.0" + - "--workers" + - "{{ .Values.ragServer.server.workers }}" + ports: + - containerPort: 8081 + {{ if .Values.ragServer.envVars }} + env: + {{- if .Values.ragServer.envVars }} + {{- range $k, $v := .Values.ragServer.envVars }} + - name: "{{ $k }}" + value: "{{ $v }}" + {{- end }} + {{- end }} + {{- end }} + {{- if .Values.ragServer.resources }} + resources: +{{ toYaml .Values.ragServer.resources | nindent 12 }} + {{- end }} + volumeMounts: + - name: prompt-volume + mountPath: /prompt.yaml + subPath: prompt.yaml + volumes: + - name: prompt-volume + configMap: + name: prompt-config + defaultMode: 0555 \ No newline at end of file diff --git a/test/e2e/rag-server/templates/service.yaml b/test/e2e/rag-server/templates/service.yaml new file mode 100644 index 000000000..19d2fdd22 --- /dev/null +++ b/test/e2e/rag-server/templates/service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: rag-server +spec: + selector: + app: rag-server + ports: + - protocol: TCP + port: 8081 + targetPort: 8081 + type: ClusterIP diff --git a/test/e2e/rag-server/templates/servicemonitor.yaml b/test/e2e/rag-server/templates/servicemonitor.yaml new file mode 100644 index 000000000..992fe0d6a --- /dev/null +++ b/test/e2e/rag-server/templates/servicemonitor.yaml @@ -0,0 +1,24 @@ +{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ .Release.Name }}-opentelemetry-collector-monitor + labels: + release: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} + component: standalone-collector +spec: + selector: + matchLabels: + app.kubernetes.io/name: opentelemetry-collector + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} + component: standalone-collector + endpoints: + - port: metrics + interval: 15s + namespaceSelector: + matchNames: + - {{ .Release.Namespace }} +{{- end }} \ No newline at end of file diff --git a/test/e2e/rag-server/values.yaml b/test/e2e/rag-server/values.yaml new file mode 100644 index 000000000..668bc441b --- /dev/null +++ b/test/e2e/rag-server/values.yaml @@ -0,0 +1,617 @@ +# Secrets +imagePullSecret: + name: "ngc-secret" + +ngcApiSecret: + name: "ngc-api" + +# Enable this if the nim-operator is not already installed in the cluster +k8s-nim-operator: + install: false + +# RAG server config +ragServer: + replicaCount: 1 + image: + repository: nvcr.io/nvidia/blueprint/rag-server + tag: "2.1.0" + pullPolicy: IfNotPresent + + server: + workers: 8 + + resources: + limits: + memory: "64Gi" + requests: + memory: "8Gi" + + envVars: + EXAMPLE_PATH: "src/" + PROMPT_CONFIG_FILE: "/prompt.yaml" + + ##===MINIO specific configurations which is used to store the multimodal base64 content=== + MINIO_ENDPOINT: "rag-minio:9000" + MINIO_ACCESSKEY: "minioadmin" + MINIO_SECRETKEY: "minioadmin" + + ##===Vector DB specific configurations=== + # URL on which vectorstore is hosted + APP_VECTORSTORE_URL: "http://rag-milvus:19530" + # Type of vectordb used to store embedding supported type milvus + APP_VECTORSTORE_NAME: "milvus" + # Type of vectordb search to be used + APP_VECTORSTORE_SEARCHTYPE: "dense" + # vectorstore collection name to store embeddings + COLLECTION_NAME: "multimodal_data" + APP_RETRIEVER_SCORETHRESHOLD: "0.25" + # Top K from vector DB, which goes as input to reranker model - not applicable if ENABLE_RERANKER is set to False + VECTOR_DB_TOPK: "100" + # Number of document chunks to insert in LLM prompt + APP_RETRIEVER_TOPK: "10" + + ##===LLM Model specific configurations=== + APP_LLM_MODELNAME: "nvidia/llama-3.1-8b-instruct" + # URL on which LLM model is hosted. If "", Nvidia hosted API is used + APP_LLM_SERVERURL: "nim-llm:8000" + + ##===Query Rewriter Model specific configurations=== + APP_QUERYREWRITER_MODELNAME: "nvidia/llama-3.1-8b-instruct" + # URL on which query rewriter model is hosted. If "", Nvidia hosted API is used + APP_QUERYREWRITER_SERVERURL: "nim-llm:8000" + + ##===Embedding Model specific configurations=== + # URL on which embedding model is hosted. If "", Nvidia hosted API is used + APP_EMBEDDINGS_SERVERURL: "nemo-retriever-embedding-ms:8000" + APP_EMBEDDINGS_MODELNAME: "nvidia/llama-3.2-nv-embedqa-1b-v2" + + ##===Reranking Model specific configurations=== + # URL on which ranking model is hosted. If "", Nvidia hosted API is used + APP_RANKING_SERVERURL: "nemo-retriever-reranking-ms:8000" + APP_RANKING_MODELNAME: "nvidia/llama-3.2-nv-rerankqa-1b-v2" + ENABLE_RERANKER: "True" + + # === Text Splitter === + APP_TEXTSPLITTER_CHUNKSIZE: "2000" + APP_TEXTSPLITTER_CHUNKOVERLAP: "200" + + # === General === + # Choose whether to enable citations in the response + ENABLE_CITATIONS: "True" + # Choose whether to enable/disable guardrails + ENABLE_GUARDRAILS: "False" + # Log level for server, supported level NOTSET, DEBUG, INFO, WARN, ERROR, CRITICAL + LOGLEVEL: "INFO" + # enable multi-turn conversation in the rag chain - this controls conversation history usage + # while doing query rewriting and in LLM prompt + ENABLE_MULTITURN: "True" + # enable query rewriting for multiturn conversation in the rag chain. + # This will improve accuracy of the retrieiver pipeline but increase latency due to an additional LLM call + ENABLE_QUERYREWRITER: "False" + # number of last n chat messages to consider from the provided conversation history + CONVERSATION_HISTORY: "5" + + # === Tracing === + APP_TRACING_ENABLED: "False" + # HTTP endpoint + APP_TRACING_OTLPHTTPENDPOINT: "http://rag-opentelemetry-collector:4318/v1/traces" + # GRPC endpoint + APP_TRACING_OTLPGRPCENDPOINT: "grpc://rag-opentelemetry-collector:4317" + + # === Reflection === + # enable reflection (context relevance and response groundedness checking) in the rag chain + ENABLE_REFLECTION: "False" + # Maximum number of context relevance loop iterations + MAX_REFLECTION_LOOP: "3" + # Minimum relevance score threshold (0-2) + CONTEXT_RELEVANCE_THRESHOLD: "1" + # Minimum groundedness score threshold (0-2) + RESPONSE_GROUNDEDNESS_THRESHOLD: "1" + # reflection llm + REFLECTION_LLM: "mistralai/mixtral-8x22b-instruct-v0.1" + # reflection llm server url. If "", Nvidia hosted API is used + REFLECTION_LLM_SERVERURL: "" + + # Choose whether to enable source metadata in document content during generation + ENABLE_SOURCE_METADATA: "true" + + # Whether to filter content within tags in model responses + FILTER_THINK_TOKENS: "true" + + # Whether to enable thinking in the rag chain for llama-3.3-nemotron-super-49b model + ENABLE_NEMOTRON_THINKING: "false" + +# Frontend +frontend: + enabled: true + replicaCount: 1 + image: + repository: nvcr.io/nvidia/blueprint/rag-playground + pullPolicy: IfNotPresent + tag: "2.1.0" + imagePullSecret: + name: "ngc-secret" + registry: "nvcr.io" + username: "$oauthtoken" + password: "" + service: + type: NodePort + port: 3000 + envVars: + - name: NEXT_PUBLIC_MODEL_NAME + value: "nvidia/llama-3.1-8b-instruct" + - name: NEXT_PUBLIC_EMBEDDING_MODEL + value: "nvidia/llama-3.2-nv-embedqa-1b-v2" + - name: NEXT_PUBLIC_RERANKER_MODEL + value: "nvidia/llama-3.2-nv-rerankqa-1b-v2" + - name: NEXT_PUBLIC_CHAT_BASE_URL + value: "http://rag-server:8081/v1" + +# Ingestor Server + # NOTE: nv-ingest assumes ngc-api secret exists in the namespace +ingestor-server: + enabled: true + milvusDeployed: false + redisDeployed: false + otelDeployed: false + zipkinDeployed: false + imagePullSecret: + name: "ngc-secret" + create: false + image: + repository: nvcr.io/nvidia/blueprint/ingestor-server + tag: "2.1.0" + pullPolicy: IfNotPresent + server: + workers: 1 + resources: + limits: + memory: "25Gi" + requests: + memory: "25Gi" + envVars: + # === Vector Store Configurations === + APP_VECTORSTORE_URL: "http://rag-milvus:19530" + APP_VECTORSTORE_NAME: "milvus" + APP_VECTORSTORE_SEARCHTYPE: "dense" + APP_VECTORSTORE_ENABLEGPUINDEX: "True" + APP_VECTORSTORE_ENABLEGPUSEARCH: "True" + COLLECTION_NAME: "multimodal_data" + + # === MinIO Configurations === + MINIO_ENDPOINT: "rag-minio:9000" + MINIO_ACCESSKEY: "minioadmin" + MINIO_SECRETKEY: "minioadmin" + + # === Embeddings Configurations === + APP_EMBEDDINGS_SERVERURL: "nemo-retriever-embedding-ms:8000" + APP_EMBEDDINGS_MODELNAME: "nvidia/llama-3.2-nv-embedqa-1b-v2" + APP_EMBEDDINGS_DIMENSIONS: "2048" + + # === NV-Ingest Configurations === + APP_NVINGEST_MESSAGECLIENTHOSTNAME: "rag-nv-ingest" + APP_NVINGEST_MESSAGECLIENTPORT: "7670" + + # === NV-Ingest extraction configurations === + APP_NVINGEST_PDFEXTRACTMETHOD: "None" # Method used for text extraction from "None", "pdfium", "nemoretriever_parse" + APP_NVINGEST_EXTRACTTEXT: "True" # Enable text extraction + APP_NVINGEST_EXTRACTTABLES: "True" # Enable table extraction + APP_NVINGEST_EXTRACTCHARTS: "True" # Enable chart extraction + APP_NVINGEST_EXTRACTIMAGES: "False" # Enable image extraction + APP_NVINGEST_TEXTDEPTH: "page" # Extract text by "page" or "document" + + # === NV-Ingest caption configurations === + APP_NVINGEST_CAPTIONMODELNAME: "meta/llama-3.2-11b-vision-instruct" # Model name for captioning + APP_NVINGEST_CAPTIONENDPOINTURL: "" # Endpoint URL for captioning model + + # === General === + ENABLE_CITATIONS: "True" + ENABLE_NV_INGEST_BATCH_MODE: "True" # Enable chunk ingestion for multi-user support + NV_INGEST_FILES_PER_BATCH: "128" # Number of documents to process in each chunk + LOGLEVEL: "INFO" + + # === NV-Ingest splitting configurations === + APP_NVINGEST_CHUNKSIZE: "512" # Size of chunks for splitting + APP_NVINGEST_CHUNKOVERLAP: "150" # Overlap size for chunks + APP_NVINGEST_ENABLEPDFSPLITTER: "True" # Enable PDF splitter + + # === Redis configurations === + REDIS_HOST: "rag-redis-master" + REDIS_PORT: "6379" + REDIS_DB: "0" + + # === Bulk upload to MinIO === + ENABLE_MINIO_BULK_UPLOAD: "False" + + # NV-Ingest + nv-ingest: + milvusDeployed: false + redisDeployed: false + otelDeployed: false + zipkinDeployed: false + milvus: + fullnameOverride: rag-milvus + imagePullSecrets: + - name: "ngc-secret" + ngcApiSecret: + create: false + ngcImagePullSecret: + create: false + image: + repository: "nvcr.io/nvidia/nemo-microservices/nv-ingest" + tag: "25.4.1" + envVars: + NVIDIA_VISIBLE_DEVICES: "void" + INGEST_LOG_LEVEL: DEFAULT + NV_INGEST_MAX_UTIL: 48 + INGEST_EDGE_BUFFER_SIZE: 64 + MRC_IGNORE_NUMA_CHECK: 1 + READY_CHECK_ALL_COMPONENTS: "true" + REDIS_MORPHEUS_TASK_QUEUE: morpheus_task_queue + NV_INGEST_DEFAULT_TIMEOUT_MS: "1234" + MAX_INGEST_PROCESS_WORKERS: 16 + EMBEDDING_NIM_ENDPOINT: "http://nemo-retriever-embedding-ms:8000/v1" + MESSAGE_CLIENT_HOST: "rag-redis-master" + MESSAGE_CLIENT_PORT: 6379 + MESSAGE_CLIENT_TYPE: "redis" + MINIO_INTERNAL_ADDRESS: "rag-minio:9000" + MILVUS_ENDPOINT: "http://rag-milvus:19530" + OTEL_EXPORTER_OTLP_ENDPOINT: "otel-collector:4317" + MODEL_PREDOWNLOAD_PATH: "/workspace/models/" + + # WAR to fix -loadbalancer from the ingestion NIMs URLs + PADDLE_GRPC_ENDPOINT: nv-ingest-paddle:8001 + PADDLE_HTTP_ENDPOINT: http://nv-ingest-paddle:8000/v1/infer + PADDLE_INFER_PROTOCOL: grpc + YOLOX_GRPC_ENDPOINT: nemoretriever-page-elements-v2:8001 + YOLOX_HTTP_ENDPOINT: http://nemoretriever-page-elements-v2:8000/v1/infer + YOLOX_INFER_PROTOCOL: grpc + YOLOX_GRAPHIC_ELEMENTS_GRPC_ENDPOINT: nemoretriever-graphic-elements-v1:8001 + YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT: http://nemoretriever-graphic-elements-v1:8000/v1/infer + YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL: grpc + YOLOX_TABLE_STRUCTURE_GRPC_ENDPOINT: nemoretriever-table-structure-v1:8001 + YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT: http://nemoretriever-table-structure-v1:8000/v1/infer + YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL: grpc + + paddleocr-nim: + deployed: false + + nemoretriever-graphic-elements-v1: + deployed: false + + nemoretriever-page-elements-v2: + deployed: false + + nemoretriever-table-structure-v1: + deployed: false + + nim-vlm-text-extraction: + deployed: false + + nim-vlm-image-captioning: + deployed: false + + nvidia-nim-llama-32-nv-embedqa-1b-v2: + deployed: false + +dra: + enabled: false + migClaims: + profiles: + - "1g.10gb" + - "3g.20gb" + +# NIMs +llm: + enabled: true + replicas: 1 + service: + name: "nim-llm" + type: ClusterIP + port: 8000 + image: + repository: nvcr.io/nim/meta/llama-3.1-8b-instruct + pullPolicy: IfNotPresent + tag: "1.3.3" + resources: + requests: {} + limits: {} + gpu: + count: 1 + migProfile: '' + model: + name: "nvidia/llama-3.1-8b-instruct" + pvc: + create: true + storageClass: "" + size: 50Gi + accessMode: ReadWriteMany + annotations: {} + autoScale: + enabled: true + hpa: + maxReplicas: 2 + minReplicas: 1 + metrics: + - type: Object + object: + metric: + name: gpu_cache_usage_perc + describedObject: + apiVersion: v1 + kind: Service + name: nim-llm + target: + type: Value + value: "0.5" + +embedding: + enabled: true + replicas: 1 + service: + name: "nemo-retriever-embedding-ms" + type: ClusterIP + port: 8000 + grpcPort: 8001 + metricsPort: 8002 + image: + repository: nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2 + tag: "1.5.0" + pullPolicy: IfNotPresent + resources: + requests: {} + limits: {} + gpu: + count: 1 + migProfile: '3g.20gb' + pvc: + create: true + storageClass: "" + size: 50Gi + accessMode: ReadWriteMany + annotations: {} + autoScale: + enabled: false + hpa: + maxReplicas: 2 + minReplicas: 1 + metrics: + - type: Object + object: + metric: + name: gpu_cache_usage_perc + describedObject: + apiVersion: v1 + kind: Service + name: nemo-retriever-embedding-ms + target: + type: Value + value: "0.5" + +reranking: + enabled: true + replicas: 1 + env: + - name: NIM_MODEL_PROFILE + value: f7391ddbcb95b2406853526b8e489fedf20083a2420563ca3e65358ff417b10f + service: + name: "nemo-retriever-reranking-ms" + type: ClusterIP + port: 8000 + grpcPort: 8001 + metricsPort: 8002 + image: + repository: nvcr.io/nim/nvidia/llama-3.2-nv-rerankqa-1b-v2 + tag: "1.5.0" + pullPolicy: IfNotPresent + resources: + requests: {} + limits: {} + gpu: + count: 1 + migProfile: '3g.20gb' + pvc: + create: true + storageClass: "" + size: 50Gi + accessMode: ReadWriteMany + annotations: {} + autoScale: + enabled: false + hpa: + maxReplicas: 2 + minReplicas: 1 + metrics: + - type: Object + object: + metric: + name: gpu_cache_usage_perc + describedObject: + apiVersion: v1 + kind: Service + name: nemo-retriever-reranking-ms + target: + type: Value + value: "0.5" + +paddleOcr: + enabled: true + replicas: 1 + service: + name: "paddleocr-nim" + type: ClusterIP + port: 8000 + grpcPort: 8001 + metricsPort: 8002 + image: + repository: nvcr.io/nim/baidu/paddleocr + tag: "1.2.0" + pullPolicy: IfNotPresent + resources: + requests: {} + limits: {} + gpu: + count: 1 + migProfile: '1g.10gb' + pvc: + create: true + storageClass: "" + size: 50Gi + accessMode: ReadWriteMany + annotations: {} + autoScale: + enabled: false + hpa: + maxReplicas: 2 + minReplicas: 1 + metrics: + - type: Object + object: + metric: + name: gpu_cache_usage_perc + describedObject: + apiVersion: v1 + kind: Service + name: nemo-retriever-reranking-ms + target: + type: Value + value: "0.5" + +graphicElements: + enabled: true + replicas: 1 + service: + name: "nemoretriever-graphic-elements-v1" + type: ClusterIP + port: 8000 + grpcPort: 8001 + metricsPort: 8002 + image: + repository: nvcr.io/nim/nvidia/nemoretriever-graphic-elements-v1 + tag: "1.2.0" + pullPolicy: IfNotPresent + resources: + requests: {} + limits: {} + gpu: + count: 1 + migProfile: '1g.10gb' + pvc: + create: true + storageClass: "" + size: 50Gi + accessMode: ReadWriteMany + annotations: {} + autoScale: + enabled: false + hpa: + maxReplicas: 2 + minReplicas: 1 + metrics: + - type: Object + object: + metric: + name: gpu_cache_usage_perc + describedObject: + apiVersion: v1 + kind: Service + name: nemoretriever-graphic-elements-v1 + target: + type: Value + value: "0.5" + +pageElements: + enabled: true + replicas: 1 + service: + name: "nemoretriever-page-elements-v2" + type: ClusterIP + port: 8000 + grpcPort: 8001 + metricsPort: 8002 + image: + repository: nvcr.io/nim/nvidia/nemoretriever-page-elements-v2 + tag: "1.2.0" + pullPolicy: IfNotPresent + resources: + requests: {} + limits: {} + gpu: + count: 1 + migProfile: '1g.10gb' + pvc: + create: true + storageClass: "" + size: 50Gi + accessMode: ReadWriteMany + annotations: {} + autoScale: + enabled: false + hpa: + maxReplicas: 2 + minReplicas: 1 + metrics: + - type: Object + object: + metric: + name: gpu_cache_usage_perc + describedObject: + apiVersion: v1 + kind: Service + name: nemoretriever-page-elements-v2 + target: + type: Value + value: "0.5" + +tableStructure: + enabled: true + replicas: 1 + service: + name: "nemoretriever-table-structure-v1" + type: ClusterIP + port: 8000 + grpcPort: 8001 + metricsPort: 8002 + image: + repository: nvcr.io/nim/nvidia/nemoretriever-table-structure-v1 + tag: "1.2.0" + pullPolicy: IfNotPresent + resources: + requests: {} + limits: {} + gpu: + count: 1 + migProfile: '1g.10gb' + pvc: + create: true + storageClass: "" + size: 50Gi + accessMode: ReadWriteMany + annotations: {} + autoScale: + enabled: false + hpa: + maxReplicas: 2 + minReplicas: 1 + metrics: + - type: Object + object: + metric: + name: gpu_cache_usage_perc + describedObject: + apiVersion: v1 + kind: Service + name: nemoretriever-table-structure-v1 + target: + type: Value + value: "0.5" + +vlmTextExtraction: + enabled: false + +vlmImageCaptioning: + enabled: false + +## Observability Support +serviceMonitor: + enabled: false