From 3e1b9f32e8378896a60d71d6b410adbe07860827 Mon Sep 17 00:00:00 2001 From: Bujie Xu Date: Fri, 7 Apr 2023 13:00:08 +0900 Subject: [PATCH] =?UTF-8?q?=F0=9F=86=95=20chore(.pre-commit-config.yaml):?= =?UTF-8?q?=20add=20pre-commit=20hooks=20for=20trailing=20whitespace,=20en?= =?UTF-8?q?d-of-file,=20yaml,=20large=20files,=20merge=20conflicts,=20debu?= =?UTF-8?q?g=20statements,=20requirements.txt,=20black,=20isort,=20and=20f?= =?UTF-8?q?lake8.=20Set=20max=20line=20length=20to=20140=20for=20flake8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🎨 style(babyagi.py): format code with black 🔥 chore(babyagi.py): remove unused imports and variables ✨ feat(babyagi.py): add support for GPT-4 model 🐛 fix(babyagi.py): fix formatting and typos in print statements 🚀 perf(babyagi.py): optimize context_agent function for faster query results 👌 refactor(babyagi.py): refactor task_creation_agent function for readability 👌 refactor(babyagi.py): refactor prioritization_agent function for readability 👌 refactor(babyagi.py): refactor execution_agent function for readability 👌 refactor(babyagi.py): refactor context_agent function for readability 📝 docs(babyagi.py): add comments to explain code functionality 🎨 style(babyagi.py): fix indentation and add line breaks for readability 🔧 chore(requirements.txt): add pre-commit dependency to ensure code quality and consistency --- .pre-commit-config.yaml | 28 +++++++++ babyagi.py | 136 +++++++++++++++++++++++++++------------- requirements.txt | 3 +- 3 files changed, 123 insertions(+), 44 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..40eb5822 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,28 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + - id: check-merge-conflict + - id: debug-statements + - id: requirements-txt-fixer + files: requirements.txt + + - repo: https://github.com/psf/black + rev: 23.3.0 + hooks: + - id: black + + - repo: https://github.com/pycqa/isort + rev: 5.11.5 + hooks: + - id: isort + + - repo: https://github.com/pycqa/flake8 + rev: 6.0.0 + hooks: + - id: flake8 + args: ["--max-line-length=140"] diff --git a/babyagi.py b/babyagi.py index e5e04c8e..c34bb9e5 100755 --- a/babyagi.py +++ b/babyagi.py @@ -1,13 +1,13 @@ #!/usr/bin/env python3 import os -import openai -import pinecone -import time import sys +import time from collections import deque from typing import Dict, List + +import openai +import pinecone from dotenv import load_dotenv -import os # Set Variables load_dotenv() @@ -20,13 +20,19 @@ assert OPENAI_API_MODEL, "OPENAI_API_MODEL environment variable is missing from .env" if "gpt-4" in OPENAI_API_MODEL.lower(): - print(f"\033[91m\033[1m"+"\n*****USING GPT-4. POTENTIALLY EXPENSIVE. MONITOR YOUR COSTS*****"+"\033[0m\033[0m") + print( + "\033[91m\033[1m" + + "\n*****USING GPT-4. POTENTIALLY EXPENSIVE. MONITOR YOUR COSTS*****" + + "\033[0m\033[0m" + ) PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "") assert PINECONE_API_KEY, "PINECONE_API_KEY environment variable is missing from .env" PINECONE_ENVIRONMENT = os.getenv("PINECONE_ENVIRONMENT", "us-east1-gcp") -assert PINECONE_ENVIRONMENT, "PINECONE_ENVIRONMENT environment variable is missing from .env" +assert ( + PINECONE_ENVIRONMENT +), "PINECONE_ENVIRONMENT environment variable is missing from .env" # Table config YOUR_TABLE_NAME = os.getenv("TABLE_NAME", "") @@ -39,8 +45,8 @@ YOUR_FIRST_TASK = os.getenv("FIRST_TASK", "") assert YOUR_FIRST_TASK, "FIRST_TASK environment variable is missing from .env" -#Print OBJECTIVE -print("\033[96m\033[1m"+"\n*****OBJECTIVE*****\n"+"\033[0m\033[0m") +# Print OBJECTIVE +print("\033[96m\033[1m" + "\n*****OBJECTIVE*****\n" + "\033[0m\033[0m") print(OBJECTIVE) # Configure OpenAI and Pinecone @@ -53,7 +59,9 @@ metric = "cosine" pod_type = "p1" if table_name not in pinecone.list_indexes(): - pinecone.create_index(table_name, dimension=dimension, metric=metric, pod_type=pod_type) + pinecone.create_index( + table_name, dimension=dimension, metric=metric, pod_type=pod_type + ) # Connect to the index index = pinecone.Index(table_name) @@ -61,15 +69,25 @@ # Task list task_list = deque([]) + def add_task(task: Dict): task_list.append(task) + def get_ada_embedding(text): text = text.replace("\n", " ") - return openai.Embedding.create(input=[text], model="text-embedding-ada-002")["data"][0]["embedding"] - -def openai_call(prompt: str, model: str = OPENAI_API_MODEL, temperature: float = 0.5, max_tokens: int = 100): - if not model.startswith('gpt-'): + return openai.Embedding.create(input=[text], model="text-embedding-ada-002")[ + "data" + ][0]["embedding"] + + +def openai_call( + prompt: str, + model: str = OPENAI_API_MODEL, + temperature: float = 0.5, + max_tokens: int = 100, +): + if not model.startswith("gpt-"): # Use completion API response = openai.Completion.create( engine=model, @@ -78,12 +96,12 @@ def openai_call(prompt: str, model: str = OPENAI_API_MODEL, temperature: float = max_tokens=max_tokens, top_p=1, frequency_penalty=0, - presence_penalty=0 + presence_penalty=0, ) return response.choices[0].text.strip() else: # Use chat completion API - messages=[{"role": "user", "content": prompt}] + messages = [{"role": "user", "content": prompt}] response = openai.ChatCompletion.create( model=model, messages=messages, @@ -94,22 +112,34 @@ def openai_call(prompt: str, model: str = OPENAI_API_MODEL, temperature: float = ) return response.choices[0].message.content.strip() -def task_creation_agent(objective: str, result: Dict, task_description: str, task_list: List[str]): - prompt = f"You are an task creation AI that uses the result of an execution agent to create new tasks with the following objective: {objective}, The last completed task has the result: {result}. This result was based on this task description: {task_description}. These are incomplete tasks: {', '.join(task_list)}. Based on the result, create new tasks to be completed by the AI system that do not overlap with incomplete tasks. Return the tasks as an array." + +def task_creation_agent( + objective: str, result: Dict, task_description: str, task_list: List[str] +): + prompt = f""" + You are an task creation AI that uses the result of an execution agent to create new tasks with the following objective: {objective}, + The last completed task has the result: {result}. + This result was based on this task description: {task_description}. These are incomplete tasks: {', '.join(task_list)}. + Based on the result, create new tasks to be completed by the AI system that do not overlap with incomplete tasks. + Return the tasks as an array.""" response = openai_call(prompt) - new_tasks = response.split('\n') + new_tasks = response.split("\n") return [{"task_name": task_name} for task_name in new_tasks] + def prioritization_agent(this_task_id: int): global task_list task_names = [t["task_name"] for t in task_list] - next_task_id = int(this_task_id)+1 - prompt = f"""You are an task prioritization AI tasked with cleaning the formatting of and reprioritizing the following tasks: {task_names}. Consider the ultimate objective of your team:{OBJECTIVE}. Do not remove any tasks. Return the result as a numbered list, like: + next_task_id = int(this_task_id) + 1 + prompt = f""" + You are an task prioritization AI tasked with cleaning the formatting of and reprioritizing the following tasks: {task_names}. + Consider the ultimate objective of your team:{OBJECTIVE}. + Do not remove any tasks. Return the result as a numbered list, like: #. First task #. Second task Start the task list with number {next_task_id}.""" response = openai_call(prompt) - new_tasks = response.split('\n') + new_tasks = response.split("\n") task_list = deque() for task_string in new_tasks: task_parts = task_string.strip().split(".", 1) @@ -118,26 +148,29 @@ def prioritization_agent(this_task_id: int): task_name = task_parts[1].strip() task_list.append({"task_id": task_id, "task_name": task_name}) + def execution_agent(objective: str, task: str) -> str: - context=context_agent(query=objective, n=5) - #print("\n*******RELEVANT CONTEXT******\n") - #print(context) - prompt =f"You are an AI who performs one task based on the following objective: {objective}.\nTake into account these previously completed tasks: {context}\nYour task: {task}\nResponse:" + context = context_agent(query=objective, n=5) + # print("\n*******RELEVANT CONTEXT******\n") + # print(context) + prompt = f""" + You are an AI who performs one task based on the following objective: {objective}. + Take into account these previously completed tasks: {context}. + Your task: {task}\nResponse:""" return openai_call(prompt, temperature=0.7, max_tokens=2000) + def context_agent(query: str, n: int): query_embedding = get_ada_embedding(query) results = index.query(query_embedding, top_k=n, include_metadata=True) - #print("***** RESULTS *****") - #print(results) - sorted_results = sorted(results.matches, key=lambda x: x.score, reverse=True) - return [(str(item.metadata['task'])) for item in sorted_results] + # print("***** RESULTS *****") + # print(results) + sorted_results = sorted(results.matches, key=lambda x: x.score, reverse=True) + return [(str(item.metadata["task"])) for item in sorted_results] + # Add the first task -first_task = { - "task_id": 1, - "task_name": YOUR_FIRST_TASK -} +first_task = {"task_id": 1, "task_name": YOUR_FIRST_TASK} add_task(first_task) # Main loop @@ -145,29 +178,46 @@ def context_agent(query: str, n: int): while True: if task_list: # Print the task list - print("\033[95m\033[1m"+"\n*****TASK LIST*****\n"+"\033[0m\033[0m") + print("\033[95m\033[1m" + "\n*****TASK LIST*****\n" + "\033[0m\033[0m") for t in task_list: - print(str(t['task_id'])+": "+t['task_name']) + print(str(t["task_id"]) + ": " + t["task_name"]) # Step 1: Pull the first task task = task_list.popleft() - print("\033[92m\033[1m"+"\n*****NEXT TASK*****\n"+"\033[0m\033[0m") - print(str(task['task_id'])+": "+task['task_name']) + print("\033[92m\033[1m" + "\n*****NEXT TASK*****\n" + "\033[0m\033[0m") + print(str(task["task_id"]) + ": " + task["task_name"]) # Send to execution function to complete the task based on the context - result = execution_agent(OBJECTIVE,task["task_name"]) + result = execution_agent(OBJECTIVE, task["task_name"]) this_task_id = int(task["task_id"]) - print("\033[93m\033[1m"+"\n*****TASK RESULT*****\n"+"\033[0m\033[0m") + print("\033[93m\033[1m" + "\n*****TASK RESULT*****\n" + "\033[0m\033[0m") print(result) # Step 2: Enrich result and store in Pinecone - enriched_result = {'data': result} # This is where you should enrich the result if needed + enriched_result = { + "data": result + } # This is where you should enrich the result if needed result_id = f"result_{task['task_id']}" - vector = enriched_result['data'] # extract the actual result from the dictionary - index.upsert([(result_id, get_ada_embedding(vector),{"task":task['task_name'],"result":result})]) + vector = enriched_result[ + "data" + ] # extract the actual result from the dictionary + index.upsert( + [ + ( + result_id, + get_ada_embedding(vector), + {"task": task["task_name"], "result": result}, + ) + ] + ) # Step 3: Create new tasks and reprioritize task list - new_tasks = task_creation_agent(OBJECTIVE,enriched_result, task["task_name"], [t["task_name"] for t in task_list]) + new_tasks = task_creation_agent( + OBJECTIVE, + enriched_result, + task["task_name"], + [t["task_name"] for t in task_list], + ) for new_task in new_tasks: task_id_counter += 1 diff --git a/requirements.txt b/requirements.txt index 35ab7743..d6531bcc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ openai==0.27.2 pinecone-client==2.2.1 -python-dotenv==1.0.0 \ No newline at end of file +pre-commit>=3.2.0 +python-dotenv==1.0.0