diff --git a/CREATE EXTENSION IF NOT EXISTS vector;.sql b/CREATE EXTENSION IF NOT EXISTS vector;.sql new file mode 100644 index 00000000..20b46353 --- /dev/null +++ b/CREATE EXTENSION IF NOT EXISTS vector;.sql @@ -0,0 +1 @@ +CREATE EXTENSION IF NOT EXISTS vector; diff --git a/Results/New Microsoft Word Document.docx b/Results/New Microsoft Word Document.docx new file mode 100644 index 00000000..70a47510 Binary files /dev/null and b/Results/New Microsoft Word Document.docx differ diff --git a/SOURCE_DOCUMENTS/Orca_paper.pdf b/SOURCE_DOCUMENTS/Orca_paper.pdf deleted file mode 100644 index 6983cb08..00000000 Binary files a/SOURCE_DOCUMENTS/Orca_paper.pdf and /dev/null differ diff --git a/SOURCE_DOCUMENTS/Saudi Sanitaryware Market.pdf b/SOURCE_DOCUMENTS/Saudi Sanitaryware Market.pdf new file mode 100644 index 00000000..f39c2953 Binary files /dev/null and b/SOURCE_DOCUMENTS/Saudi Sanitaryware Market.pdf differ diff --git a/chroma.sqlite3 b/chroma.sqlite3 new file mode 100644 index 00000000..7b78ee17 Binary files /dev/null and b/chroma.sqlite3 differ diff --git a/constants.py b/constants.py index cf27985d..07fdfa49 100644 --- a/constants.py +++ b/constants.py @@ -2,11 +2,12 @@ # from dotenv import load_dotenv from chromadb.config import Settings +# from faissdb.config import Settings # https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/excel.html?highlight=xlsx#microsoft-excel -from langchain.document_loaders import CSVLoader, PDFMinerLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader -from langchain.document_loaders import UnstructuredFileLoader, UnstructuredMarkdownLoader -from langchain.document_loaders import UnstructuredHTMLLoader +from langchain_community.document_loaders import CSVLoader, PDFMinerLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader +from langchain_community.document_loaders import UnstructuredFileLoader, UnstructuredMarkdownLoader +from langchain_community.document_loaders import UnstructuredHTMLLoader # load_dotenv() @@ -19,6 +20,9 @@ MODELS_PATH = "./models" +# INDEX_PATH = "faiss_index.index" +# METADATA_PATH = "faiss_metadata.pkl" + # Can be changed to a specific number INGEST_THREADS = os.cpu_count() or 8 @@ -59,7 +63,7 @@ # Default Instructor Model EMBEDDING_MODEL_NAME = "hkunlp/instructor-large" # Uses 1.5 GB of VRAM (High Accuracy with lower VRAM usage) - +# EMBEDDING_MODEL_NAME = 'TheBloke/Mistral-7B-Instruct-v0.1-GGUF' #### #### OTHER EMBEDDING MODEL OPTIONS #### @@ -107,15 +111,18 @@ # MODEL_BASENAME = "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf" # LLAMA 3 # use for Apple Silicon -MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct" -MODEL_BASENAME = None +# MODEL_ID = "meta-llama/Meta-Llama-3-8B-Instruct" +# MODEL_ID = "TheBloke/Llama-2-7B-32K-Instruct-GPTQ" +# MODEL_BASENAME = None#"Llama-2-7B-32K-Instruct-GPTQ" +# MODEL_BASENAME = "model.safetensors.awq" + # LLAMA 3 # use for NVIDIA GPUs # MODEL_ID = "unsloth/llama-3-8b-bnb-4bit" # MODEL_BASENAME = None -# MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF" -# MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf" +MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF" +MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf" # MODEL_ID = "TheBloke/Llama-2-70b-Chat-GGUF" # MODEL_BASENAME = "llama-2-70b-chat.Q4_K_M.gguf" diff --git a/down.ipynb b/down.ipynb new file mode 100644 index 00000000..e69de29b diff --git a/ingest.py b/ingest.py index 5e61627e..c7c2229f 100644 --- a/ingest.py +++ b/ingest.py @@ -1,13 +1,26 @@ import logging import os from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed +import faiss +import pickle +from transformers import AutoModel, AutoTokenizer +import psycopg2 import click import torch from langchain.docstore.document import Document from langchain.text_splitter import Language, RecursiveCharacterTextSplitter -from langchain.vectorstores import Chroma +from langchain_community.vectorstores import Chroma , FAISS from utils import get_embeddings +from langchain_community.docstore.in_memory import InMemoryDocstore + +from langchain_cohere import CohereEmbeddings +# from langchain_core.documents import Document +# from langchain_postgres import PGVector +from langchain_postgres.vectorstores import PGVector + +from pgvector.psycopg import register_vector +import psycopg from constants import ( CHROMA_SETTINGS, @@ -16,9 +29,13 @@ INGEST_THREADS, PERSIST_DIRECTORY, SOURCE_DIRECTORY, + # INDEX_PATH, + # METADATA_PATH, + ) + def file_log(logentry): file1 = open("file_ingest.log", "a") file1.write(logentry + "\n") @@ -113,36 +130,59 @@ def split_documents(documents: list[Document]) -> tuple[list[Document], list[Doc return text_docs, python_docs -@click.command() -@click.option( - "--device_type", - default="cuda" if torch.cuda.is_available() else "cpu", - type=click.Choice( - [ - "cpu", - "cuda", - "ipu", - "xpu", - "mkldnn", - "opengl", - "opencl", - "ideep", - "hip", - "ve", - "fpga", - "ort", - "xla", - "lazy", - "vulkan", - "mps", - "meta", - "hpu", - "mtia", - ], - ), - help="Device to run on. (Default is cuda)", -) +# @click.command() +# @click.option( +# "--device_type", +# default="cuda" if torch.cuda.is_available() else "cpu", +# type=click.Choice( +# [ +# "cpu", +# "cuda", +# "ipu", +# "xpu", +# "mkldnn", +# "opengl", +# "opencl", +# "ideep", +# "hip", +# "ve", +# "fpga", +# "ort", +# "xla", +# "lazy", +# "vulkan", +# "mps", +# "meta", +# "hpu", +# "mtia", +# ], +# ), +# help="Device to run on. (Default is cuda)", +# ) +# def save_faiss_index(db, index_path, metadata_path): +# faiss.write_index(db.index, index_path) +# metadata = { +# "index_to_docstore_id": db.index_to_docstore_id, +# "docstore": db.docstore, +# } +# with open(metadata_path, "wb") as f: +# pickle.dump(metadata, f) + +# def load_faiss_index(index_path, metadata_path): +# index = faiss.read_index(index_path) +# with open(metadata_path, "rb") as f: +# metadata = pickle.load(f) +# docstore = metadata["docstore"] +# index_to_docstore_id = metadata["index_to_docstore_id"] +# db = FAISS(index=index, +# docstore=docstore, +# index_to_docstore_id=index_to_docstore_id) +# return db +device_type = 'cpu' + + def main(device_type): + print(f"Running on device: {device_type}") # Load documents and split in chunks logging.info(f"Loading documents from {SOURCE_DIRECTORY}") documents = load_documents(SOURCE_DIRECTORY) @@ -161,22 +201,136 @@ def main(device_type): (2) Provides additional arguments for instructor and BGE models to improve results, pursuant to the instructions contained on their respective huggingface repository, project page or github repository. + """ embeddings = get_embeddings(device_type) logging.info(f"Loaded embeddings from {EMBEDDING_MODEL_NAME}") - db = Chroma.from_documents( - texts, - embeddings, - persist_directory=PERSIST_DIRECTORY, - client_settings=CHROMA_SETTINGS, + # # See docker command above to launch a postgres instance with pgvector enabled. + connection = "postgresql+psycopg://postgres:123456@localhost:5432/postgres" # Uses psycopg3! + # # "dbname=postgres user=postgres password=123456 host=localhost port=5432" + # connection.execute('CREATE EXTENSION IF NOT EXISTS vector') + # register_vector(connection) + + # connection.execute('DROP TABLE IF EXISTS documents') + # connection.execute('CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding vector(384))') + + collection_name = "PG_VECTOR_SAudi" + # embeddings = CohereEmbeddings() +# ------------------------------- +# Q +#-------------------------------- + # connection = psycopg2.connect("dbname=postgres user=postgres password=123456 host=localhost port=5432") + # connection = connection.cursor() + # db = PGVector( + # documents= texts, + # embeddings=embeddings, + # collection_name=collection_name, + # connection=connection, + # use_jsonb=True, + # ) + # db.add_documents(texts, ids=[doc.metadata["id"] for doc in texts]) + # "dbname=postgres user=postgres password=123456 host=localhost port=5432" + #changing to more programatically conn string + db = PGVector.from_documents( + documents= texts, + embedding=embeddings, + collection_name=collection_name, + connection=connection, + use_jsonb=True, ) + print(">>>>>>>>/n/n>>>>>>>>>>Connected AND Loaded to the database successfully!") + + # collection_name = "PG_VECTOR_SAudi" + # db = PGVector.from_documents( + # embedding=embeddings, + # documents=texts, + # connection_string=CONNECTION_STRING, + # collection_name=collection_name, + # ) +# ------------------------------- +# Q +#-------------------------------- + # db = Chroma.from_documents( + # texts, + # embeddings, + # persist_directory=PERSIST_DIRECTORY, + # client_settings=CHROMA_SETTINGS, + # ) + # if os.path.exists(INDEX_PATH) and os.path.exists(METADATA_PATH): + # db = load_faiss_index(INDEX_PATH, METADATA_PATH) + # logging.info("Loaded FAISS index and metadata from disk.") + # else: + + # d = embeddings.shape[1] + # index = faiss.IndexFlatL2(d) + # index.add(embeddings) + + # docstore = InMemoryDocstore() + # index_to_docstore_id = {i: doc["id"] for i, doc in enumerate(texts)} + + # db = FAISS(index=index, docstore=docstore, index_to_docstore_id=index_to_docstore_id) + + # save_faiss_index(db, INDEX_PATH, METADATA_PATH) + # logging.info("Saved FAISS index and metadata to disk.") + + # Load the model and tokenizer + # model_name = EMBEDDING_MODEL_NAME + # tokenizer = AutoTokenizer.from_pretrained(model_name) + # model = AutoModel.from_pretrained(model_name) + # # Tokenize the input texts + # inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt") + # # Get the embeddings from the model + # with torch.no_grad(): + # outputs = model(**inputs) + # # Extract the last hidden states (embeddings) + # embeddings = outputs.last_hidden_state + # # Pool the embeddings (e.g., mean pooling) + # pooled_embeddings = embeddings.mean(dim=1) + # # Convert the embeddings to a NumPy array + # numpy_embeddings = pooled_embeddings.cpu().numpy() + + # # Get the dimension of the vectors + # vector_dimension = numpy_embeddings.shape[1] + + # Create the FAISS index + # faiss_index = faiss.IndexFlatL2(vector_dimension) + # print(faiss_index.is_trained) + # # Add the embeddings to the index + # faiss_index.add(numpy_embeddings) + # # Save the index + # faiss.write_index(faiss_index, index_file_path) + # print(f"Index saved to {index_file_path}") + # print(faiss_index.ntotal) + + # Define the directory and file name to save the index + # persist_dir = PERSIST_DIRECTORY + # index_file_path = os.path.join(persist_dir, 'faiss_index.index') + + # # Load the index to verify + # faiss_index_loaded = faiss.read_index(index_file_path) + # print(f"Index loaded from {index_file_path}") + + # Verify the loaded index + # print(f"Number of vectors in the loaded index: {faiss_index_loaded.ntotal}") + + # db = FAISS.from_documents( + # texts, + # embeddings, + # # persist_directory=PERSIST_DIRECTORY, + # # client_settings=CHROMA_SETTINGS, + # ) + # db.save_local("DB/faiss") +import argparse if __name__ == "__main__": logging.basicConfig( format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)s - %(message)s", level=logging.INFO ) - main() + # parser = argparse.ArgumentParser(description="Ingest script for localGPT") + # parser.add_argument("--device_type", type=str, required=True, help="Device type (cpu or gpu)") + # args = parser.parse_args() + main(device_type='cpu')#args.device_type) diff --git a/localGPT_UI.py b/localGPT_UI.py index 6a357e9c..09209b62 100644 --- a/localGPT_UI.py +++ b/localGPT_UI.py @@ -2,14 +2,16 @@ import subprocess import streamlit as st from run_localGPT import load_model -from langchain.vectorstores import Chroma +from langchain_community.vectorstores import Chroma from constants import CHROMA_SETTINGS, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY, MODEL_ID, MODEL_BASENAME -from langchain.embeddings import HuggingFaceInstructEmbeddings +from langchain_community.embeddings import HuggingFaceInstructEmbeddings from langchain.chains import RetrievalQA -from streamlit_extras.add_vertical_space import add_vertical_space +from streamlit_extras import add_vertical_space from langchain.prompts import PromptTemplate from langchain.memory import ConversationBufferMemory +# Use the function in your Streamlit app +add_vertical_space(10) def model_memory(): # Adding history to the model. diff --git a/postgres.session.sql b/postgres.session.sql new file mode 100644 index 00000000..47fc9a40 --- /dev/null +++ b/postgres.session.sql @@ -0,0 +1,2 @@ +CREATE EXTENSION IF NOT EXISTS vector; +-- CREATE EXTENSION vector WITH SCHEMA public; diff --git a/run_localGPT.py b/run_localGPT.py index 185c983c..c6c32fa4 100644 --- a/run_localGPT.py +++ b/run_localGPT.py @@ -4,10 +4,18 @@ import torch import utils from langchain.chains import RetrievalQA -from langchain.embeddings import HuggingFaceInstructEmbeddings +from langchain_community.embeddings import HuggingFaceInstructEmbeddings from langchain.llms import HuggingFacePipeline from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler # for streaming response from langchain.callbacks.manager import CallbackManager +from transformers import AutoModel, AutoTokenizer + +from langchain_community.embeddings import OpenAIEmbeddings +from langchain_community.docstore.in_memory import InMemoryDocstore +import faiss +from langchain_community.vectorstores.faiss import FAISS +from langchain_community.embeddings import HuggingFaceEmbeddings +from langchain_community.docstore.in_memory import InMemoryDocstore callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) @@ -15,12 +23,26 @@ from utils import get_embeddings # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler -from langchain.vectorstores import Chroma +from langchain_community.vectorstores import Chroma ,FAISS +from langchain_community.vectorstores import chroma from transformers import ( GenerationConfig, pipeline, ) +from langchain_cohere import CohereEmbeddings +from langchain_core.documents import Document +from langchain_postgres import PGVector +from langchain_postgres.vectorstores import PGVector + +from pgvector.psycopg2 import register_vector +import psycopg +import psycopg2 + +from langchain_community.llms import OpenAI +from langchain_community.utilities import SQLDatabase +# SQLDatabaseChain + from load_models import ( load_quantized_model_awq, load_quantized_model_gguf_ggml, @@ -38,6 +60,9 @@ CHROMA_SETTINGS, ) +# Check if CUDA is available +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +print(f"Using device: {device}") def load_model(device_type, model_id, model_basename=None, LOGGING=logging): """ @@ -130,23 +155,140 @@ def retrieval_qa_pipline(device_type, use_history, promptTemplate_type="llama"): embeddings = get_embeddings(device_type) logging.info(f"Loaded embeddings from {EMBEDDING_MODEL_NAME}") + # load the llm pipeline + llm = load_model(device_type, model_id=MODEL_ID, model_basename=MODEL_BASENAME, LOGGING=logging) + + + # https://api.pgxn.org/src/vector/vector-0.7.0/sql/vector--0.6.2--0.7.0.sql + # See docker command above to launch a postgres instance with pgvector enabled. + # connection = "postgresql+psycopg://postgres:123456@localhost:5432/postgres" # Uses psycopg3! + + + connection = psycopg2.connect("dbname=postgres user=postgres password=123456 host=localhost port=5432") + print(">>>>>>>>/n/n>>>>>>>>>>Connected to the database successfully!") + # curr = connection.cursor() + # "dbname=postgres user=postgres password=123456 host=localhost port=5432" + + #install pgvector + # curr.execute("CREATE EXTENSION IF NOT EXISTS vector;") + # connection.commit() + # Close the cursor and connection + # curr.close() + # connection.close() + #Connect to and configure your vector database + # Register the vector type with psycopg2 + + # register_vector(connection) + """ + REF: + https://www.timescale.com/blog/postgresql-as-a-vector-database-create-store-and-query-openai-embeddings-with-pgvector/ + https://www.timescale.com/blog/how-to-build-llm-applications-with-pgvector-vector-store-in-langchain/ + Once we’ve connected to the database, let’s create a table that we’ll use to store embeddings along with metadata. Our table will look as follows: + + id title url content tokens embedding + + Id : represents the unique ID of each vector embedding in the table. + title : is the blog title from which the content associated with the embedding is taken. + url : is the blog URL from which the content associated with the embedding is taken. + content : is the actual blog content associated with the embedding. + tokens : is the number of tokens the embedding represents. + embedding : is the vector representation of the content. + """ + # Create table to store embeddings and metadata + # table_create_command = """ + # CREATE TABLE embeddings ( + + # id bigserial primary key, + # title text, + # url text, + # content text, + # tokens integer, + # embedding vector(1536) + + # ); + # """ + + # curr.execute(table_create_command) + # curr.close() + # connection.commit() + # register_vector(connection) + # connection.commit() + # connection.execute('DROP TABLE IF EXISTS documents') + # connection.execute('CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding vector(384))') + # curr = connection.cursor() + + # for content, embedding in zip(input, embeddings): + # connection.execute('INSERT INTO embeddings (content, embedding) VALUES (%s, %s)', (content, embedding)) + + # document_id = 1 + # db = connection.execute('SELECT content FROM documents WHERE id != %(id)s ORDER BY embedding <=> (SELECT embedding FROM documents WHERE id = %(id)s) LIMIT 5', {'id': document_id}).fetchall() + # for neighbor in db: + # print(neighbor[0]) + + # uri = 'postgresql+psycopg://postgres:123456@localhost:5432/postgres' + # dbUri= uri + collection_name = "PG_VECTOR_SAudi" + # db = SQLDatabase.from_uri(connection) + connection = "postgresql+psycopg://postgres:123456@localhost:5432/postgres" # + + db = PGVector( #.from_existing_index( + embeddings=embeddings, + collection_name=collection_name, + connection=connection, + use_jsonb=True, + ) # load the vectorstore - db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embeddings, client_settings=CHROMA_SETTINGS) - retriever = db.as_retriever() + # db = Chroma(persist_directory=PERSIST_DIRECTORY, + # embedding_function=embeddings, + # client_settings=CHROMA_SETTINGS + # ) + + + # print(embeddings) + + + + + + # Initialize the FAISS index + # faiss_index = faiss.IndexFlatL2(768) + + # # # Initialize the docstore + # docstore = InMemoryDocstore() + # # # Initialize the index_to_docstore_id + # index_to_docstore_id = {} + # # Add the embeddings to the index + # faiss_index.add(embeddings) + # Loading the saved embeddings + # db =FAISS.load_local("DB/faiss", embeddings, allow_dangerous_deserialization=True) + # db = FAISS( + # embedding_function=embeddings, + # index=faiss_index, + # # docstore=docstore, + # # index_to_docstore_id=index_to_docstore_id + # ) + + # # Add documents and their embeddings to the FAISS index and the docstore + # for i, (text, embedding) in enumerate(zip(df['Text'].tolist(), embeddings)): + # db.add_document(doc_id=i, text=text, embedding=embedding) + retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":2})#search_kwargs={'k':2} # get the prompt template and memory if set by the user. - prompt, memory = get_prompt_template(promptTemplate_type=promptTemplate_type, history=use_history) + prompt, memory = get_prompt_template(promptTemplate_type=promptTemplate_type, + history=use_history) - # load the llm pipeline - llm = load_model(device_type, model_id=MODEL_ID, model_basename=MODEL_BASENAME, LOGGING=logging) + + # # Ensure the model is on CPU + # device = torch.device("cpu") + # llm.to(device) if use_history: qa = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", # try other chains types as well. refine, map_reduce, map_rerank retriever=retriever, - return_source_documents=True, # verbose=True, + return_source_documents=True, verbose=True, callbacks=callback_manager, chain_type_kwargs={"prompt": prompt, "memory": memory}, ) @@ -155,7 +297,7 @@ def retrieval_qa_pipline(device_type, use_history, promptTemplate_type="llama"): llm=llm, chain_type="stuff", # try other chains types as well. refine, map_reduce, map_rerank retriever=retriever, - return_source_documents=True, # verbose=True, + return_source_documents=True, verbose=True, callbacks=callback_manager, chain_type_kwargs={ "prompt": prompt, @@ -256,7 +398,9 @@ def main(device_type, show_sources, use_history, model_type, save_qa): if query == "exit": break # Get the answer from the chain - res = qa(query) + # res = qa(query) + res = qa.invoke(query) + answer, docs = res["result"], res["source_documents"] # Print the result diff --git a/run_localGPT_API.py b/run_localGPT_API.py index b345612f..fff8d16d 100644 --- a/run_localGPT_API.py +++ b/run_localGPT_API.py @@ -7,14 +7,14 @@ import torch from flask import Flask, jsonify, request from langchain.chains import RetrievalQA -from langchain.embeddings import HuggingFaceInstructEmbeddings +from langchain_community.embeddings import HuggingFaceInstructEmbeddings -# from langchain.embeddings import HuggingFaceEmbeddings +# from langchain_community.embeddings import HuggingFaceEmbeddings from run_localGPT import load_model from prompt_template_utils import get_prompt_template # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler -from langchain.vectorstores import Chroma +from langchain_community.vectorstores import Chroma from werkzeug.utils import secure_filename from constants import CHROMA_SETTINGS, EMBEDDING_MODEL_NAME, PERSIST_DIRECTORY, MODEL_ID, MODEL_BASENAME diff --git a/utils.py b/utils.py index 4e9fd820..b0e7fd91 100644 --- a/utils.py +++ b/utils.py @@ -2,9 +2,9 @@ import csv from datetime import datetime from constants import EMBEDDING_MODEL_NAME -from langchain.embeddings import HuggingFaceInstructEmbeddings -from langchain.embeddings import HuggingFaceBgeEmbeddings -from langchain.embeddings import HuggingFaceEmbeddings +from langchain_community.embeddings import HuggingFaceInstructEmbeddings +from langchain_community.embeddings import HuggingFaceBgeEmbeddings +from langchain_community.embeddings import HuggingFaceEmbeddings def log_to_csv(question, answer):