diff --git a/Results/New Microsoft Word Document.docx b/Results/New Microsoft Word Document.docx new file mode 100644 index 00000000..402a229c Binary files /dev/null and b/Results/New Microsoft Word Document.docx differ diff --git a/SOURCE_DOCUMENTS/Orca_paper.pdf b/SOURCE_DOCUMENTS/Orca_paper.pdf deleted file mode 100644 index 6983cb08..00000000 Binary files a/SOURCE_DOCUMENTS/Orca_paper.pdf and /dev/null differ diff --git a/SOURCE_DOCUMENTS/Saudi Sanitaryware Market.pdf b/SOURCE_DOCUMENTS/Saudi Sanitaryware Market.pdf new file mode 100644 index 00000000..f39c2953 Binary files /dev/null and b/SOURCE_DOCUMENTS/Saudi Sanitaryware Market.pdf differ diff --git a/chroma.sqlite3 b/chroma.sqlite3 new file mode 100644 index 00000000..7b78ee17 Binary files /dev/null and b/chroma.sqlite3 differ diff --git a/constants.py b/constants.py index cf27985d..07fdfa49 100644 --- a/constants.py +++ b/constants.py @@ -2,11 +2,12 @@ # from dotenv import load_dotenv from chromadb.config import Settings +# from faissdb.config import Settings # https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/excel.html?highlight=xlsx#microsoft-excel -from langchain.document_loaders import CSVLoader, PDFMinerLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader -from langchain.document_loaders import UnstructuredFileLoader, UnstructuredMarkdownLoader -from langchain.document_loaders import UnstructuredHTMLLoader +from langchain_community.document_loaders import CSVLoader, PDFMinerLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader +from langchain_community.document_loaders import UnstructuredFileLoader, UnstructuredMarkdownLoader +from langchain_community.document_loaders import UnstructuredHTMLLoader # load_dotenv() @@ -19,6 +20,9 @@ MODELS_PATH = "./models" +# INDEX_PATH = "faiss_index.index" +# METADATA_PATH = "faiss_metadata.pkl" + # Can be changed to a specific number INGEST_THREADS = os.cpu_count() or 8 @@ -59,7 +63,7 @@ # Default Instructor Model EMBEDDING_MODEL_NAME = "hkunlp/instructor-large" # Uses 1.5 GB of VRAM (High Accuracy with lower VRAM usage) - +# EMBEDDING_MODEL_NAME = 'TheBloke/Mistral-7B-Instruct-v0.1-GGUF' #### #### OTHER EMBEDDING MODEL OPTIONS #### @@ -107,15 +111,18 @@ # MODEL_BASENAME = "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf" # LLAMA 3 # use for Apple Silicon -MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct" -MODEL_BASENAME = None +# MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct" +# MODEL_ID = "TheBloke/Llama-2-7B-32K-Instruct-GPTQ" +# MODEL_BASENAME = None#"Llama-2-7B-32K-Instruct-GPTQ" +# MODEL_BASENAME = "model.safetensors.awq" + # LLAMA 3 # use for NVIDIA GPUs # MODEL_ID = "unsloth/llama-3-8b-bnb-4bit" # MODEL_BASENAME = None -# MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF" -# MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf" +MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF" +MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf" # MODEL_ID = "TheBloke/Llama-2-70b-Chat-GGUF" # MODEL_BASENAME = "llama-2-70b-chat.Q4_K_M.gguf" diff --git a/ingest.py b/ingest.py index 5e61627e..9020a094 100644 --- a/ingest.py +++ b/ingest.py @@ -1,13 +1,17 @@ import logging import os from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed +import faiss +import pickle +from transformers import AutoModel, AutoTokenizer import click import torch from langchain.docstore.document import Document from langchain.text_splitter import Language, RecursiveCharacterTextSplitter -from langchain.vectorstores import Chroma +from langchain_community.vectorstores import Chroma , FAISS from utils import get_embeddings +from langchain_community.docstore.in_memory import InMemoryDocstore from constants import ( CHROMA_SETTINGS, @@ -16,9 +20,13 @@ INGEST_THREADS, PERSIST_DIRECTORY, SOURCE_DIRECTORY, + # INDEX_PATH, + # METADATA_PATH, + ) + def file_log(logentry): file1 = open("file_ingest.log", "a") file1.write(logentry + "\n") @@ -142,7 +150,30 @@ def split_documents(documents: list[Document]) -> tuple[list[Document], list[Doc ), help="Device to run on. (Default is cuda)", ) +def save_faiss_index(db, index_path, metadata_path): + faiss.write_index(db.index, index_path) + metadata = { + "index_to_docstore_id": db.index_to_docstore_id, + "docstore": db.docstore, + } + with open(metadata_path, "wb") as f: + pickle.dump(metadata, f) + +def load_faiss_index(index_path, metadata_path): + index = faiss.read_index(index_path) + with open(metadata_path, "rb") as f: + metadata = pickle.load(f) + docstore = metadata["docstore"] + index_to_docstore_id = metadata["index_to_docstore_id"] + db = FAISS(index=index, + docstore=docstore, + index_to_docstore_id=index_to_docstore_id) + return db + + + def main(device_type): + print(f"Running on device: {device_type}") # Load documents and split in chunks logging.info(f"Loading documents from {SOURCE_DIRECTORY}") documents = load_documents(SOURCE_DIRECTORY) @@ -161,6 +192,7 @@ def main(device_type): (2) Provides additional arguments for instructor and BGE models to improve results, pursuant to the instructions contained on their respective huggingface repository, project page or github repository. + """ embeddings = get_embeddings(device_type) @@ -173,10 +205,78 @@ def main(device_type): persist_directory=PERSIST_DIRECTORY, client_settings=CHROMA_SETTINGS, ) + # if os.path.exists(INDEX_PATH) and os.path.exists(METADATA_PATH): + # db = load_faiss_index(INDEX_PATH, METADATA_PATH) + # logging.info("Loaded FAISS index and metadata from disk.") + # else: + + # d = embeddings.shape[1] + # index = faiss.IndexFlatL2(d) + # index.add(embeddings) + + # docstore = InMemoryDocstore() + # index_to_docstore_id = {i: doc["id"] for i, doc in enumerate(texts)} + + # db = FAISS(index=index, docstore=docstore, index_to_docstore_id=index_to_docstore_id) + + # save_faiss_index(db, INDEX_PATH, METADATA_PATH) + # logging.info("Saved FAISS index and metadata to disk.") + + # Load the model and tokenizer + # model_name = EMBEDDING_MODEL_NAME + # tokenizer = AutoTokenizer.from_pretrained(model_name) + # model = AutoModel.from_pretrained(model_name) + # # Tokenize the input texts + # inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt") + # # Get the embeddings from the model + # with torch.no_grad(): + # outputs = model(**inputs) + # # Extract the last hidden states (embeddings) + # embeddings = outputs.last_hidden_state + # # Pool the embeddings (e.g., mean pooling) + # pooled_embeddings = embeddings.mean(dim=1) + # # Convert the embeddings to a NumPy array + # numpy_embeddings = pooled_embeddings.cpu().numpy() + + # # Get the dimension of the vectors + # vector_dimension = numpy_embeddings.shape[1] + + # Create the FAISS index + # faiss_index = faiss.IndexFlatL2(vector_dimension) + # print(faiss_index.is_trained) + # # Add the embeddings to the index + # faiss_index.add(numpy_embeddings) + # # Save the index + # faiss.write_index(faiss_index, index_file_path) + # print(f"Index saved to {index_file_path}") + # print(faiss_index.ntotal) + + # Define the directory and file name to save the index + # persist_dir = PERSIST_DIRECTORY + # index_file_path = os.path.join(persist_dir, 'faiss_index.index') + + # # Load the index to verify + # faiss_index_loaded = faiss.read_index(index_file_path) + # print(f"Index loaded from {index_file_path}") + + # Verify the loaded index + # print(f"Number of vectors in the loaded index: {faiss_index_loaded.ntotal}") + + # db = FAISS.from_documents( + # texts, + # embeddings, + # # persist_directory=PERSIST_DIRECTORY, + # # client_settings=CHROMA_SETTINGS, + # ) + # db.save_local("DB/faiss") +import argparse if __name__ == "__main__": logging.basicConfig( format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)s - %(message)s", level=logging.INFO ) - main() + parser = argparse.ArgumentParser(description="Ingest script for localGPT") + parser.add_argument("--device_type", type=str, required=True, help="Device type (cpu or gpu)") + args = parser.parse_args() + main(args.device_type) diff --git a/localGPT_UI.py b/localGPT_UI.py index 6a357e9c..09209b62 100644 --- a/localGPT_UI.py +++ b/localGPT_UI.py @@ -2,14 +2,16 @@ import subprocess import streamlit as st from run_localGPT import load_model -from langchain.vectorstores import Chroma +from langchain_community.vectorstores import Chroma from constants import CHROMA_SETTINGS, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY, MODEL_ID, MODEL_BASENAME -from langchain.embeddings import HuggingFaceInstructEmbeddings +from langchain_community.embeddings import HuggingFaceInstructEmbeddings from langchain.chains import RetrievalQA -from streamlit_extras.add_vertical_space import add_vertical_space +from streamlit_extras import add_vertical_space from langchain.prompts import PromptTemplate from langchain.memory import ConversationBufferMemory +# Use the function in your Streamlit app +add_vertical_space(10) def model_memory(): # Adding history to the model. diff --git a/run_localGPT.py b/run_localGPT.py index 185c983c..516e9faf 100644 --- a/run_localGPT.py +++ b/run_localGPT.py @@ -4,10 +4,18 @@ import torch import utils from langchain.chains import RetrievalQA -from langchain.embeddings import HuggingFaceInstructEmbeddings +from langchain_community.embeddings import HuggingFaceInstructEmbeddings from langchain.llms import HuggingFacePipeline from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler # for streaming response from langchain.callbacks.manager import CallbackManager +from transformers import AutoModel, AutoTokenizer + +from langchain_community.embeddings import OpenAIEmbeddings +from langchain_community.docstore.in_memory import InMemoryDocstore +import faiss +from langchain_community.vectorstores.faiss import FAISS +from langchain_community.embeddings import HuggingFaceEmbeddings +from langchain_community.docstore.in_memory import InMemoryDocstore callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) @@ -15,7 +23,8 @@ from utils import get_embeddings # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler -from langchain.vectorstores import Chroma +from langchain_community.vectorstores import Chroma ,FAISS +from langchain_community.vectorstores import chroma from transformers import ( GenerationConfig, pipeline, @@ -38,6 +47,9 @@ CHROMA_SETTINGS, ) +# Check if CUDA is available +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +print(f"Using device: {device}") def load_model(device_type, model_id, model_basename=None, LOGGING=logging): """ @@ -132,14 +144,51 @@ def retrieval_qa_pipline(device_type, use_history, promptTemplate_type="llama"): logging.info(f"Loaded embeddings from {EMBEDDING_MODEL_NAME}") # load the vectorstore - db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embeddings, client_settings=CHROMA_SETTINGS) + db = Chroma(persist_directory=PERSIST_DIRECTORY, + embedding_function=embeddings, + client_settings=CHROMA_SETTINGS + ) + + + # print(embeddings) + + + + + + # Initialize the FAISS index + # faiss_index = faiss.IndexFlatL2(768) + + # # # Initialize the docstore + # docstore = InMemoryDocstore() + # # # Initialize the index_to_docstore_id + # index_to_docstore_id = {} + # # Add the embeddings to the index + # faiss_index.add(embeddings) + # Loading the saved embeddings + # db =FAISS.load_local("DB/faiss", embeddings, allow_dangerous_deserialization=True) + # db = FAISS( + # embedding_function=embeddings, + # index=faiss_index, + # # docstore=docstore, + # # index_to_docstore_id=index_to_docstore_id + # ) + + # # Add documents and their embeddings to the FAISS index and the docstore + # for i, (text, embedding) in enumerate(zip(df['Text'].tolist(), embeddings)): + # db.add_document(doc_id=i, text=text, embedding=embedding) retriever = db.as_retriever() # get the prompt template and memory if set by the user. - prompt, memory = get_prompt_template(promptTemplate_type=promptTemplate_type, history=use_history) + prompt, memory = get_prompt_template(promptTemplate_type=promptTemplate_type, + history=use_history) # load the llm pipeline llm = load_model(device_type, model_id=MODEL_ID, model_basename=MODEL_BASENAME, LOGGING=logging) + + # # Ensure the model is on CPU + # device = torch.device("cpu") + # llm.to(device) if use_history: qa = RetrievalQA.from_chain_type( @@ -256,7 +305,9 @@ def main(device_type, show_sources, use_history, model_type, save_qa): if query == "exit": break # Get the answer from the chain - res = qa(query) + # res = qa(query) + res = qa.invoke(query) + answer, docs = res["result"], res["source_documents"] # Print the result diff --git a/run_localGPT_API.py b/run_localGPT_API.py index b345612f..fff8d16d 100644 --- a/run_localGPT_API.py +++ b/run_localGPT_API.py @@ -7,14 +7,14 @@ import torch from flask import Flask, jsonify, request from langchain.chains import RetrievalQA -from langchain.embeddings import HuggingFaceInstructEmbeddings +from langchain_community.embeddings import HuggingFaceInstructEmbeddings -# from langchain.embeddings import HuggingFaceEmbeddings +# from langchain_community.embeddings import HuggingFaceEmbeddings from run_localGPT import load_model from prompt_template_utils import get_prompt_template # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler -from langchain.vectorstores import Chroma +from langchain_community.vectorstores import Chroma from werkzeug.utils import secure_filename from constants import CHROMA_SETTINGS, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY, MODEL_ID, MODEL_BASENAME diff --git a/utils.py b/utils.py index 4e9fd820..b0e7fd91 100644 --- a/utils.py +++ b/utils.py @@ -2,9 +2,9 @@ import csv from datetime import datetime from constants import EMBEDDING_MODEL_NAME -from langchain.embeddings import HuggingFaceInstructEmbeddings -from langchain.embeddings import HuggingFaceBgeEmbeddings -from langchain.embeddings import HuggingFaceEmbeddings +from langchain_community.embeddings import HuggingFaceInstructEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings +from langchain_community.embeddings import HuggingFaceEmbeddings def log_to_csv(question, answer):