Skip to content

Commit

Permalink
Feat: chat name edit (QuivrHQ#343)
Browse files Browse the repository at this point in the history
* feat(chat): add name update

* chore(linting): add flake8

* feat: add chat name edit
  • Loading branch information
mamadoudicko authored Jun 20, 2023
1 parent 8ed8a2c commit e1a7404
Show file tree
Hide file tree
Showing 25 changed files with 393 additions and 154 deletions.
4 changes: 4 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[flake8]
; Minimal configuration for Flake8 to work with Black.
max-line-length = 100
ignore = E101,E111,E112,E221,E222,E501,E711,E712,W503,W504,F401
15 changes: 10 additions & 5 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
{
"[python]": {
"editor.defaultFormatter": "ms-python.autopep8"
},
"python.formatting.provider": "black",
"editor.codeActionsOnSave": {
"source.organizeImports": true
"source.organizeImports": true,
"source.fixAll":true
},
"python.linting.enabled": true,
"python.linting.flake8Enabled": true,
"editor.formatOnSave": true,
"[typescript]": {
"editor.defaultFormatter": "esbenp.prettier-vscode",
"editor.formatOnSave": true
},
"python.linting.enabled": true
"editor.formatOnSaveMode": "modifications"
}
53 changes: 39 additions & 14 deletions backend/auth/api_key_handler.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,61 @@

from datetime import datetime

from fastapi import HTTPException
from models.settings import CommonsDep
from pydantic import DateError


async def verify_api_key(api_key: str, commons: CommonsDep):
async def verify_api_key(api_key: str, commons: CommonsDep):
try:
# Use UTC time to avoid timezone issues
current_date = datetime.utcnow().date()
result = commons['supabase'].table('api_keys').select('api_key', 'creation_time').filter('api_key', 'eq', api_key).filter('is_active', 'eq', True).execute()
result = (
commons["supabase"]
.table("api_keys")
.select("api_key", "creation_time")
.filter("api_key", "eq", api_key)
.filter("is_active", "eq", True)
.execute()
)
if result.data is not None and len(result.data) > 0:
api_key_creation_date = datetime.strptime(result.data[0]['creation_time'], "%Y-%m-%dT%H:%M:%S").date()
api_key_creation_date = datetime.strptime(
result.data[0]["creation_time"], "%Y-%m-%dT%H:%M:%S"
).date()

# Check if the API key was created today: Todo remove this check and use deleted_time instead.
if api_key_creation_date == current_date:
return True
return False
except DateError:
return False



async def get_user_from_api_key(api_key: str, commons: CommonsDep):
# Lookup the user_id from the api_keys table
user_id_data = commons['supabase'].table('api_keys').select('user_id').filter('api_key', 'eq', api_key).execute()

if not user_id_data.data:
raise HTTPException(status_code=400, detail="Invalid API key.")

user_id = user_id_data.data[0]['user_id']
user_id_data = (
commons["supabase"]
.table("api_keys")
.select("user_id")
.filter("api_key", "eq", api_key)
.execute()
)

if not user_id_data.data:
raise HTTPException(status_code=400, detail="Invalid API key.")

user_id = user_id_data.data[0]["user_id"]

# Lookup the email from the users table. Todo: remove and use user_id for credentials
user_email_data = commons['supabase'].table('users').select('email').filter('user_id', 'eq', user_id).execute()
# Lookup the email from the users table. Todo: remove and use user_id for credentials
user_email_data = (
commons["supabase"]
.table("users")
.select("email")
.filter("user_id", "eq", user_id)
.execute()
)

return {'email': user_email_data.data[0]['email']} if user_email_data.data else {'email': None}
return (
{"email": user_email_data.data[0]["email"]}
if user_email_data.data
else {"email": None}
)
13 changes: 9 additions & 4 deletions backend/auth/auth_bearer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ def __init__(self, auto_error: bool = True):
super().__init__(auto_error=auto_error)

async def __call__(self, request: Request, commons: CommonsDep):
credentials: Optional[HTTPAuthorizationCredentials] = await super().__call__(request)
credentials: Optional[HTTPAuthorizationCredentials] = await super().__call__(
request
)
self.check_scheme(credentials)
token = credentials.credentials
return await self.authenticate(token, commons)
Expand All @@ -33,10 +35,13 @@ async def authenticate(self, token: str, commons: CommonsDep):
elif await verify_api_key(token, commons):
return await get_user_from_api_key(token, commons)
else:
raise HTTPException(status_code=402, detail="Invalid token or expired token.")
raise HTTPException(
status_code=402, detail="Invalid token or expired token."
)

def get_test_user(self):
return {'email': '[email protected]'} # replace with test user information
return {"email": "[email protected]"} # replace with test user information


def get_current_user(credentials: dict = Depends(AuthBearer())) -> User:
return User(email=credentials.get('email', 'none'))
return User(email=credentials.get("email", "none"))
14 changes: 10 additions & 4 deletions backend/auth/jwt_token_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
ALGORITHM = "HS256"


def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
to_encode = data.copy()
if expires_delta:
Expand All @@ -18,19 +19,24 @@ def create_access_token(data: dict, expires_delta: Optional[timedelta] = None):
encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
return encoded_jwt


def decode_access_token(token: str):
try:
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM], options={"verify_aud": False})
payload = jwt.decode(
token, SECRET_KEY, algorithms=[ALGORITHM], options={"verify_aud": False}
)
return payload
except JWTError as e:
except JWTError:
return None



def verify_token(token: str):
payload = decode_access_token(token)
return payload is not None


def get_user_email_from_token(token: str):
payload = decode_access_token(token)
if payload:
return payload.get("email")
return "none"
return "none"
30 changes: 15 additions & 15 deletions backend/crawl/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@


class CrawlWebsite(BaseModel):
url : str
js : bool = False
depth : int = 1
max_pages : int = 100
max_time : int = 60
url: str
js: bool = False
depth: int = 1
max_pages: int = 100
max_time: int = 60

def _crawl(self, url):
response = requests.get(url)
Expand All @@ -24,28 +24,28 @@ def _crawl(self, url):

def process(self):
content = self._crawl(self.url)
## Create a file

# Create a file
file_name = slugify(self.url) + ".html"
temp_file_path = os.path.join(tempfile.gettempdir(), file_name)
with open(temp_file_path, 'w') as temp_file:
with open(temp_file_path, "w") as temp_file:
temp_file.write(content)
## Process the file
# Process the file

if content:
return temp_file_path, file_name
else:
return None

def checkGithub(self):
if "github.com" in self.url:
return True
else:
return False



def slugify(text):
text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8')
text = re.sub(r'[^\w\s-]', '', text).strip().lower()
text = re.sub(r'[-\s]+', '-', text)
return text
text = unicodedata.normalize("NFKD", text).encode("ascii", "ignore").decode("utf-8")
text = re.sub(r"[^\w\s-]", "", text).strip().lower()
text = re.sub(r"[-\s]+", "-", text)
return text
52 changes: 31 additions & 21 deletions backend/llm/summarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@

openai_api_key = os.environ.get("OPENAI_API_KEY")
openai.api_key = openai_api_key
summary_llm = guidance.llms.OpenAI('gpt-3.5-turbo-0613', caching=False)
summary_llm = guidance.llms.OpenAI("gpt-3.5-turbo-0613", caching=False)


def llm_summerize(document):
summary = guidance("""
summary = guidance(
"""
{{#system~}}
You are a world best summarizer. \n
Condense the text, capturing essential points and core ideas. Include relevant \
Expand All @@ -28,21 +29,23 @@ def llm_summerize(document):
{{#assistant~}}
{{gen 'summarization' temperature=0.2 max_tokens=100}}
{{/assistant~}}
""", llm=summary_llm)
""",
llm=summary_llm,
)

summary = summary(document=document)
logger.info('Summarization: %s', summary)
return summary['summarization']
logger.info("Summarization: %s", summary)
return summary["summarization"]


def llm_evaluate_summaries(question, summaries, model):
if not model.startswith('gpt'):
logger.info(
f'Model {model} not supported. Using gpt-3.5-turbo instead.')
model = 'gpt-3.5-turbo-0613'
logger.info(f'Evaluating summaries with {model}')
if not model.startswith("gpt"):
logger.info(f"Model {model} not supported. Using gpt-3.5-turbo instead.")
model = "gpt-3.5-turbo-0613"
logger.info(f"Evaluating summaries with {model}")
evaluation_llm = guidance.llms.OpenAI(model, caching=False)
evaluation = guidance("""
evaluation = guidance(
"""
{{#system~}}
You are a world best evaluator. You evaluate the relevance of summaries based \
on user input question. Return evaluation in following csv format, csv headers \
Expand Down Expand Up @@ -73,23 +76,30 @@ def llm_evaluate_summaries(question, summaries, model):
{{#assistant~}}
{{gen 'evaluation' temperature=0.2 stop='<|im_end|>'}}
{{/assistant~}}
""", llm=evaluation_llm)
""",
llm=evaluation_llm,
)
result = evaluation(question=question, summaries=summaries)
evaluations = {}
for evaluation in result['evaluation'].split('\n'):
if evaluation == '' or not evaluation[0].isdigit():
for evaluation in result["evaluation"].split("\n"):
if evaluation == "" or not evaluation[0].isdigit():
continue
logger.info('Evaluation Row: %s', evaluation)
summary_id, document_id, score, *reason = evaluation.split(',')
logger.info("Evaluation Row: %s", evaluation)
summary_id, document_id, score, *reason = evaluation.split(",")
if not score.isdigit():
continue
score = int(score)
if score < 3 or score > 5:
continue
evaluations[summary_id] = {
'evaluation': score,
'reason': ','.join(reason),
'summary_id': summary_id,
'document_id': document_id,
"evaluation": score,
"reason": ",".join(reason),
"summary_id": summary_id,
"document_id": document_id,
}
return [e for e in sorted(evaluations.values(), key=lambda x: x['evaluation'], reverse=True)]
return [
e
for e in sorted(
evaluations.values(), key=lambda x: x["evaluation"], reverse=True
)
]
7 changes: 4 additions & 3 deletions backend/models/brains.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@ class Brain(BaseModel):
model: str = "gpt-3.5-turbo-0613"
temperature: float = 0.0
max_tokens: int = 256

class BrainToUpdate(BaseModel):


class BrainToUpdate(BaseModel):
brain_id: UUID
brain_name: Optional[str] = "New Brain"
status: Optional[str] = "public"
model: Optional[str] = "gpt-3.5-turbo-0613"
temperature: Optional[float] = 0.0
max_tokens: Optional[int] = 256
file_sha1: Optional[str] = ''
file_sha1: Optional[str] = ""
7 changes: 6 additions & 1 deletion backend/models/chats.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,9 @@ class ChatMessage(BaseModel):
temperature: float = 0.0
max_tokens: int = 256
use_summarization: bool = False
chat_id: Optional[UUID] = None,
chat_id: Optional[UUID] = None
chat_name: Optional[str] = None


class ChatAttributes(BaseModel):
chat_name: Optional[str] = None
2 changes: 1 addition & 1 deletion backend/models/users.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pydantic import BaseModel


class User (BaseModel):
class User(BaseModel):
email: str
Loading

0 comments on commit e1a7404

Please sign in to comment.