Skip to content
This repository was archived by the owner on Feb 11, 2025. It is now read-only.

Llama #84

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN mkdir -p $FOLDER

# Install packages
COPY ./requirements.txt $FOLDER/requirements.txt
RUN pip install -r $FOLDER/requirements.txt
RUN pip install --no-cache-dir -r $FOLDER/requirements.txt

# Copy the project files into the container
COPY ./src $FOLDER/src
Expand Down
4 changes: 3 additions & 1 deletion api/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,6 @@ retry==0.9.2
tiktoken==0.4.0
python-dotenv==1.0.0
websockets===11.0.3
gunicorn===20.1.0
gunicorn===20.1.0
transformers
torch==2.3.0
39 changes: 29 additions & 10 deletions api/src/embedding/openai.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,38 @@
import openai
from embedding.base_embedding import BaseEmbedding
# import openai
# from embedding.base_embedding import BaseEmbedding


class OpenAIEmbedding(BaseEmbedding):
"""Wrapper around OpenAI embedding models."""
# class OpenAIEmbedding(BaseEmbedding):
# """Wrapper around OpenAI embedding models."""

# def __init__(
# self, openai_api_key: str, model_name: str = "text-embedding-ada-002"
# ) -> None:
# openai.api_key = openai_api_key
# self.model = model_name

# def generate(
# self,
# input: str,
# ) -> str:
# embedding = openai.Embedding.create(input=input, model=self.model)
# return embedding["data"][0]["embedding"]

from sentence_transformers import SentenceTransformer
from base_embedding import BaseEmbedding


class LlamaEmbedding(BaseEmbedding):
"""Wrapper around HuggingFace embedding models."""

def __init__(
self, openai_api_key: str, model_name: str = "text-embedding-ada-002"
self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"
) -> None:
openai.api_key = openai_api_key
self.model = model_name
self.model = SentenceTransformer(model_name)

def generate(
self,
input: str,
) -> str:
embedding = openai.Embedding.create(input=input, model=self.model)
return embedding["data"][0]["embedding"]
) -> list:
embedding = self.model.encode(input)
return embedding
162 changes: 117 additions & 45 deletions api/src/llm/openai.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,109 @@
# from typing import (
# Callable,
# List,
# )

# import openai
# import tiktoken
from llm.basellm import BaseLLM
# from retry import retry


# class OpenAIChat(BaseLLM):
# """Wrapper around OpenAI Chat large language models."""

# def __init__(
# self,
# openai_api_key: str,
# model_name: str = "gpt-3.5-turbo",
# max_tokens: int = 1000,
# temperature: float = 0.0,
# ) -> None:
# openai.api_key = openai_api_key
# self.model = model_name
# self.max_tokens = max_tokens
# self.temperature = temperature

# @retry(tries=3, delay=1)
# def generate(
# self,
# messages: List[str],
# ) -> str:
# try:
# completions = openai.ChatCompletion.create(
# model=self.model,
# temperature=self.temperature,
# max_tokens=self.max_tokens,
# messages=messages,
# )
# return completions.choices[0].message.content
# # catch context length / do not retry
# except openai.error.InvalidRequestError as e:
# return str(f"Error: {e}")
# # catch authorization errors / do not retry
# except openai.error.AuthenticationError as e:
# return "Error: The provided OpenAI API key is invalid"
# except Exception as e:
# print(f"Retrying LLM call {e}")
# raise Exception()

# async def generateStreaming(
# self,
# messages: List[str],
# onTokenCallback=Callable[[str], None],
# ) -> str:
# result = []
# completions = openai.ChatCompletion.create(
# model=self.model,
# temperature=self.temperature,
# max_tokens=self.max_tokens,
# messages=messages,
# stream=True,
# )
# result = []
# for message in completions:
# # Process the streamed messages or perform any other desired action
# delta = message["choices"][0]["delta"]
# if "content" in delta:
# result.append(delta["content"])
# await onTokenCallback(message)
# return result

# def num_tokens_from_string(self, string: str) -> int:
# encoding = tiktoken.encoding_for_model(self.model)
# num_tokens = len(encoding.encode(string))
# return num_tokens

# def max_allowed_token_length(self) -> int:
# # TODO: list all models and their max tokens from api
# return 2049
from typing import (
Callable,
List,
)

import openai
import tiktoken
from llm.basellm import BaseLLM
# from transformers import LlamaForCausalLM, LlamaTokenizer
import torch
# from basellm import BaseLLM
from retry import retry

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

# tokenizer = AutoTokenizer.from_pretrained("TheBloke/Llama-2-7B-32K-Instruct-GPTQ", trust_remote_code=True)
# model = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-32K-Instruct-GPTQ", trust_remote_code=True)

class OpenAIChat(BaseLLM):
"""Wrapper around OpenAI Chat large language models."""
class Llama2Chat(BaseLLM):
"""Wrapper around HuggingFace Llama2 large language models."""

def __init__(
self,
openai_api_key: str,
model_name: str = "gpt-3.5-turbo",
max_tokens: int = 1000,
model_name: str = "TheBloke/Llama-2-7B-32K-Instruct-GPTQ",
max_tokens: int = 2056,
temperature: float = 0.0,
) -> None:
openai.api_key = openai_api_key
self.model = model_name
self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
self.max_tokens = max_tokens
self.temperature = temperature

Expand All @@ -30,50 +113,39 @@ def generate(
messages: List[str],
) -> str:
try:
completions = openai.ChatCompletion.create(
model=self.model,
temperature=self.temperature,
max_tokens=self.max_tokens,
messages=messages,
)
return completions.choices[0].message.content
# catch context length / do not retry
except openai.error.InvalidRequestError as e:
return str(f"Error: {e}")
# catch authorization errors / do not retry
except openai.error.AuthenticationError as e:
return "Error: The provided OpenAI API key is invalid"
# Concatenate the messages into a single string
input_text = " ".join(messages)
inputs = self.tokenizer(input_text, return_tensors="pt", max_length=self.max_tokens, truncation=True)
outputs = self.model.generate(**inputs, max_length=self.max_tokens, temperature=self.temperature)
return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
except Exception as e:
print(f"Retrying LLM call {e}")
raise Exception()
raise Exception(f"Error: {e}")

async def generateStreaming(
self,
messages: List[str],
onTokenCallback=Callable[[str], None],
) -> str:
result = []
completions = openai.ChatCompletion.create(
model=self.model,
temperature=self.temperature,
max_tokens=self.max_tokens,
messages=messages,
stream=True,
)
result = []
for message in completions:
# Process the streamed messages or perform any other desired action
delta = message["choices"][0]["delta"]
if "content" in delta:
result.append(delta["content"])
await onTokenCallback(message)
return result
try:
input_text = " ".join(messages)
inputs = self.tokenizer(input_text, return_tensors="pt", max_length=self.max_tokens, truncation=True)
outputs = self.model.generate(**inputs, max_length=self.max_tokens, temperature=self.temperature)

result = []
for token_id in outputs[0]:
token = self.tokenizer.decode(token_id, skip_special_tokens=True)
result.append(token)
await onTokenCallback(token)
return result
except Exception as e:
print(f"Error during streaming generation: {e}")
raise Exception(f"Error: {e}")

def num_tokens_from_string(self, string: str) -> int:
encoding = tiktoken.encoding_for_model(self.model)
num_tokens = len(encoding.encode(string))
return num_tokens
inputs = self.tokenizer(string, return_tensors="pt")
return inputs.input_ids.shape[1]

def max_allowed_token_length(self) -> int:
# TODO: list all models and their max tokens from api
return 2049
return self.tokenizer.model_max_length

78 changes: 57 additions & 21 deletions api/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fewshot_examples import get_fewshot_examples
from llm.openai import OpenAIChat
from llm.openai import Llama2Chat
# from llm.openai import OpenAIChat

from pydantic import BaseModel


Expand Down Expand Up @@ -79,11 +81,16 @@ async def questionProposalsForCurrentDb(payload: questionProposalPayload):

questionProposalGenerator = QuestionProposalGenerator(
database=neo4j_connection,
llm=OpenAIChat(
openai_api_key=api_key,
model_name="gpt-3.5-turbo-0613",
# llm=OpenAIChat(
# openai_api_key=api_key,
# model_name="gpt-3.5-turbo-0613",
# max_tokens=512,
# temperature=0.8,
llm=Llama2Chat(
# openai_api_key=api_key,
model_name="TheBloke/Llama-2-7B-32K-Instruct-GPTQ",
max_tokens=512,
temperature=0.8,
temperature=0,
),
)

Expand Down Expand Up @@ -128,17 +135,33 @@ async def onToken(token):
)
api_key = openai_api_key if openai_api_key else data.get("api_key")

default_llm = OpenAIChat(
openai_api_key=api_key,
model_name=data.get("model_name", "gpt-3.5-turbo-0613"),
# default_llm = OpenAIChat(
# openai_api_key=api_key,
# model_name=data.get("model_name", "gpt-3.5-turbo-0613"),
# )

default_llm = Llama2Chat(
# openai_api_key=api_key,
model_name=data.get("model_name", "TheBloke/Llama-2-7B-32K-Instruct-GPTQ"),
max_tokens=512,
temperature=0,
)

# summarize_results = SummarizeCypherResult(
# llm=OpenAIChat(
# openai_api_key=api_key,
# model_name="gpt-3.5-turbo-0613",
# max_tokens=128,
# )
# )
summarize_results = SummarizeCypherResult(
llm=OpenAIChat(
openai_api_key=api_key,
model_name="gpt-3.5-turbo-0613",
max_tokens=128,
)
llm=Llama2Chat(
# openai_api_key=api_key,
model_name=data.get("model_name", "TheBloke/Llama-2-7B-32K-Instruct-GPTQ"),
max_tokens=128,
temperature=0,
)
)

text2cypher = Text2Cypher(
database=neo4j_connection,
Expand Down Expand Up @@ -205,9 +228,16 @@ async def root(payload: ImportPayload):
try:
result = ""

llm = OpenAIChat(
openai_api_key=api_key, model_name="gpt-3.5-turbo-16k", max_tokens=4000
)
# llm = OpenAIChat(
# openai_api_key=api_key, model_name="gpt-3.5-turbo-16k", max_tokens=4000
# )
llm=Llama2Chat(
# openai_api_key=api_key,
model_name="TheBloke/Llama-2-7B-32K-Instruct-GPTQ",
max_tokens=512,
temperature=0,
)


if not payload.neo4j_schema:
extractor = DataExtractor(llm=llm)
Expand Down Expand Up @@ -246,11 +276,17 @@ async def companyInformation(payload: companyReportPayload):
)
api_key = openai_api_key if openai_api_key else payload.api_key

llm = OpenAIChat(
openai_api_key=api_key,
model_name="gpt-3.5-turbo-16k-0613",
max_tokens=512,
)
# llm = OpenAIChat(
# openai_api_key=api_key,
# model_name="gpt-3.5-turbo-16k-0613",
# max_tokens=512,
# )
llm=Llama2Chat(
model_name="TheBloke/Llama-2-7B-32K-Instruct-GPTQ",
max_tokens=512,
temperature=0,
)

print("Running company report for " + payload.company)
company_report = CompanyReport(neo4j_connection, payload.company, llm)
result = company_report.run()
Expand Down
7 changes: 6 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: "3.7"
version: "3.8"
services:
backend:
build:
Expand All @@ -20,3 +20,8 @@ services:
container_name: ui
ports:
- 4173:4173
volumes:
- .:/app
- /app/node_modules
environment:
- NODE_ENV=development
Loading