From 3e1b9f32e8378896a60d71d6b410adbe07860827 Mon Sep 17 00:00:00 2001
From: Bujie Xu <bujie.x.xu@gsk.com>
Date: Fri, 7 Apr 2023 13:00:08 +0900
Subject: [PATCH] =?UTF-8?q?=F0=9F=86=95=20chore(.pre-commit-config.yaml):?=
 =?UTF-8?q?=20add=20pre-commit=20hooks=20for=20trailing=20whitespace,=20en?=
 =?UTF-8?q?d-of-file,=20yaml,=20large=20files,=20merge=20conflicts,=20debu?=
 =?UTF-8?q?g=20statements,=20requirements.txt,=20black,=20isort,=20and=20f?=
 =?UTF-8?q?lake8.=20Set=20max=20line=20length=20to=20140=20for=20flake8.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🎨 style(babyagi.py): format code with black
🔥 chore(babyagi.py): remove unused imports and variables
✨ feat(babyagi.py): add support for GPT-4 model
🐛 fix(babyagi.py): fix formatting and typos in print statements
🚀 perf(babyagi.py): optimize context_agent function for faster query results
👌 refactor(babyagi.py): refactor task_creation_agent function for readability
👌 refactor(babyagi.py): refactor prioritization_agent function for readability
👌 refactor(babyagi.py): refactor execution_agent function for readability
👌 refactor(babyagi.py): refactor context_agent function for readability
📝 docs(babyagi.py): add comments to explain code functionality

🎨 style(babyagi.py): fix indentation and add line breaks for readability

🔧 chore(requirements.txt): add pre-commit dependency to ensure code quality and consistency
---
 .pre-commit-config.yaml |  28 +++++++++
 babyagi.py              | 136 +++++++++++++++++++++++++++-------------
 requirements.txt        |   3 +-
 3 files changed, 123 insertions(+), 44 deletions(-)
 create mode 100644 .pre-commit-config.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..40eb5822
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,28 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
+      - id: check-merge-conflict
+      - id: debug-statements
+      - id: requirements-txt-fixer
+        files: requirements.txt
+
+  - repo: https://github.com/psf/black
+    rev: 23.3.0
+    hooks:
+      - id: black
+
+  - repo: https://github.com/pycqa/isort
+    rev: 5.11.5
+    hooks:
+      - id: isort
+
+  - repo: https://github.com/pycqa/flake8
+    rev: 6.0.0
+    hooks:
+      - id: flake8
+        args: ["--max-line-length=140"]
diff --git a/babyagi.py b/babyagi.py
index e5e04c8e..c34bb9e5 100755
--- a/babyagi.py
+++ b/babyagi.py
@@ -1,13 +1,13 @@
 #!/usr/bin/env python3
 import os
-import openai
-import pinecone
-import time
 import sys
+import time
 from collections import deque
 from typing import Dict, List
+
+import openai
+import pinecone
 from dotenv import load_dotenv
-import os
 
 # Set Variables
 load_dotenv()
@@ -20,13 +20,19 @@
 assert OPENAI_API_MODEL, "OPENAI_API_MODEL environment variable is missing from .env"
 
 if "gpt-4" in OPENAI_API_MODEL.lower():
-    print(f"\033[91m\033[1m"+"\n*****USING GPT-4. POTENTIALLY EXPENSIVE. MONITOR YOUR COSTS*****"+"\033[0m\033[0m")
+    print(
+        "\033[91m\033[1m"
+        + "\n*****USING GPT-4. POTENTIALLY EXPENSIVE. MONITOR YOUR COSTS*****"
+        + "\033[0m\033[0m"
+    )
 
 PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "")
 assert PINECONE_API_KEY, "PINECONE_API_KEY environment variable is missing from .env"
 
 PINECONE_ENVIRONMENT = os.getenv("PINECONE_ENVIRONMENT", "us-east1-gcp")
-assert PINECONE_ENVIRONMENT, "PINECONE_ENVIRONMENT environment variable is missing from .env"
+assert (
+    PINECONE_ENVIRONMENT
+), "PINECONE_ENVIRONMENT environment variable is missing from .env"
 
 # Table config
 YOUR_TABLE_NAME = os.getenv("TABLE_NAME", "")
@@ -39,8 +45,8 @@
 YOUR_FIRST_TASK = os.getenv("FIRST_TASK", "")
 assert YOUR_FIRST_TASK, "FIRST_TASK environment variable is missing from .env"
 
-#Print OBJECTIVE
-print("\033[96m\033[1m"+"\n*****OBJECTIVE*****\n"+"\033[0m\033[0m")
+# Print OBJECTIVE
+print("\033[96m\033[1m" + "\n*****OBJECTIVE*****\n" + "\033[0m\033[0m")
 print(OBJECTIVE)
 
 # Configure OpenAI and Pinecone
@@ -53,7 +59,9 @@
 metric = "cosine"
 pod_type = "p1"
 if table_name not in pinecone.list_indexes():
-    pinecone.create_index(table_name, dimension=dimension, metric=metric, pod_type=pod_type)
+    pinecone.create_index(
+        table_name, dimension=dimension, metric=metric, pod_type=pod_type
+    )
 
 # Connect to the index
 index = pinecone.Index(table_name)
@@ -61,15 +69,25 @@
 # Task list
 task_list = deque([])
 
+
 def add_task(task: Dict):
     task_list.append(task)
 
+
 def get_ada_embedding(text):
     text = text.replace("\n", " ")
-    return openai.Embedding.create(input=[text], model="text-embedding-ada-002")["data"][0]["embedding"]
-
-def openai_call(prompt: str, model: str = OPENAI_API_MODEL, temperature: float = 0.5, max_tokens: int = 100):
-    if not model.startswith('gpt-'):
+    return openai.Embedding.create(input=[text], model="text-embedding-ada-002")[
+        "data"
+    ][0]["embedding"]
+
+
+def openai_call(
+    prompt: str,
+    model: str = OPENAI_API_MODEL,
+    temperature: float = 0.5,
+    max_tokens: int = 100,
+):
+    if not model.startswith("gpt-"):
         # Use completion API
         response = openai.Completion.create(
             engine=model,
@@ -78,12 +96,12 @@ def openai_call(prompt: str, model: str = OPENAI_API_MODEL, temperature: float =
             max_tokens=max_tokens,
             top_p=1,
             frequency_penalty=0,
-            presence_penalty=0
+            presence_penalty=0,
         )
         return response.choices[0].text.strip()
     else:
         # Use chat completion API
-        messages=[{"role": "user", "content": prompt}]
+        messages = [{"role": "user", "content": prompt}]
         response = openai.ChatCompletion.create(
             model=model,
             messages=messages,
@@ -94,22 +112,34 @@ def openai_call(prompt: str, model: str = OPENAI_API_MODEL, temperature: float =
         )
         return response.choices[0].message.content.strip()
 
-def task_creation_agent(objective: str, result: Dict, task_description: str, task_list: List[str]):
-    prompt = f"You are an task creation AI that uses the result of an execution agent to create new tasks with the following objective: {objective}, The last completed task has the result: {result}. This result was based on this task description: {task_description}. These are incomplete tasks: {', '.join(task_list)}. Based on the result, create new tasks to be completed by the AI system that do not overlap with incomplete tasks. Return the tasks as an array."
+
+def task_creation_agent(
+    objective: str, result: Dict, task_description: str, task_list: List[str]
+):
+    prompt = f"""
+    You are an task creation AI that uses the result of an execution agent to create new tasks with the following objective: {objective},
+    The last completed task has the result: {result}.
+    This result was based on this task description: {task_description}. These are incomplete tasks: {', '.join(task_list)}.
+    Based on the result, create new tasks to be completed by the AI system that do not overlap with incomplete tasks.
+    Return the tasks as an array."""
     response = openai_call(prompt)
-    new_tasks = response.split('\n')
+    new_tasks = response.split("\n")
     return [{"task_name": task_name} for task_name in new_tasks]
 
+
 def prioritization_agent(this_task_id: int):
     global task_list
     task_names = [t["task_name"] for t in task_list]
-    next_task_id = int(this_task_id)+1
-    prompt = f"""You are an task prioritization AI tasked with cleaning the formatting of and reprioritizing the following tasks: {task_names}. Consider the ultimate objective of your team:{OBJECTIVE}. Do not remove any tasks. Return the result as a numbered list, like:
+    next_task_id = int(this_task_id) + 1
+    prompt = f"""
+    You are an task prioritization AI tasked with cleaning the formatting of and reprioritizing the following tasks: {task_names}.
+    Consider the ultimate objective of your team:{OBJECTIVE}.
+    Do not remove any tasks. Return the result as a numbered list, like:
     #. First task
     #. Second task
     Start the task list with number {next_task_id}."""
     response = openai_call(prompt)
-    new_tasks = response.split('\n')
+    new_tasks = response.split("\n")
     task_list = deque()
     for task_string in new_tasks:
         task_parts = task_string.strip().split(".", 1)
@@ -118,26 +148,29 @@ def prioritization_agent(this_task_id: int):
             task_name = task_parts[1].strip()
             task_list.append({"task_id": task_id, "task_name": task_name})
 
+
 def execution_agent(objective: str, task: str) -> str:
-    context=context_agent(query=objective, n=5)
-    #print("\n*******RELEVANT CONTEXT******\n")
-    #print(context)
-    prompt =f"You are an AI who performs one task based on the following objective: {objective}.\nTake into account these previously completed tasks: {context}\nYour task: {task}\nResponse:"
+    context = context_agent(query=objective, n=5)
+    # print("\n*******RELEVANT CONTEXT******\n")
+    # print(context)
+    prompt = f"""
+    You are an AI who performs one task based on the following objective: {objective}.
+    Take into account these previously completed tasks: {context}.
+    Your task: {task}\nResponse:"""
     return openai_call(prompt, temperature=0.7, max_tokens=2000)
 
+
 def context_agent(query: str, n: int):
     query_embedding = get_ada_embedding(query)
     results = index.query(query_embedding, top_k=n, include_metadata=True)
-    #print("***** RESULTS *****")
-    #print(results)
-    sorted_results = sorted(results.matches, key=lambda x: x.score, reverse=True)    
-    return [(str(item.metadata['task'])) for item in sorted_results]
+    # print("***** RESULTS *****")
+    # print(results)
+    sorted_results = sorted(results.matches, key=lambda x: x.score, reverse=True)
+    return [(str(item.metadata["task"])) for item in sorted_results]
+
 
 # Add the first task
-first_task = {
-    "task_id": 1,
-    "task_name": YOUR_FIRST_TASK
-}
+first_task = {"task_id": 1, "task_name": YOUR_FIRST_TASK}
 
 add_task(first_task)
 # Main loop
@@ -145,29 +178,46 @@ def context_agent(query: str, n: int):
 while True:
     if task_list:
         # Print the task list
-        print("\033[95m\033[1m"+"\n*****TASK LIST*****\n"+"\033[0m\033[0m")
+        print("\033[95m\033[1m" + "\n*****TASK LIST*****\n" + "\033[0m\033[0m")
         for t in task_list:
-            print(str(t['task_id'])+": "+t['task_name'])
+            print(str(t["task_id"]) + ": " + t["task_name"])
 
         # Step 1: Pull the first task
         task = task_list.popleft()
-        print("\033[92m\033[1m"+"\n*****NEXT TASK*****\n"+"\033[0m\033[0m")
-        print(str(task['task_id'])+": "+task['task_name'])
+        print("\033[92m\033[1m" + "\n*****NEXT TASK*****\n" + "\033[0m\033[0m")
+        print(str(task["task_id"]) + ": " + task["task_name"])
 
         # Send to execution function to complete the task based on the context
-        result = execution_agent(OBJECTIVE,task["task_name"])
+        result = execution_agent(OBJECTIVE, task["task_name"])
         this_task_id = int(task["task_id"])
-        print("\033[93m\033[1m"+"\n*****TASK RESULT*****\n"+"\033[0m\033[0m")
+        print("\033[93m\033[1m" + "\n*****TASK RESULT*****\n" + "\033[0m\033[0m")
         print(result)
 
         # Step 2: Enrich result and store in Pinecone
-        enriched_result = {'data': result}  # This is where you should enrich the result if needed
+        enriched_result = {
+            "data": result
+        }  # This is where you should enrich the result if needed
         result_id = f"result_{task['task_id']}"
-        vector = enriched_result['data']  # extract the actual result from the dictionary
-        index.upsert([(result_id, get_ada_embedding(vector),{"task":task['task_name'],"result":result})])
+        vector = enriched_result[
+            "data"
+        ]  # extract the actual result from the dictionary
+        index.upsert(
+            [
+                (
+                    result_id,
+                    get_ada_embedding(vector),
+                    {"task": task["task_name"], "result": result},
+                )
+            ]
+        )
 
     # Step 3: Create new tasks and reprioritize task list
-    new_tasks = task_creation_agent(OBJECTIVE,enriched_result, task["task_name"], [t["task_name"] for t in task_list])
+    new_tasks = task_creation_agent(
+        OBJECTIVE,
+        enriched_result,
+        task["task_name"],
+        [t["task_name"] for t in task_list],
+    )
 
     for new_task in new_tasks:
         task_id_counter += 1
diff --git a/requirements.txt b/requirements.txt
index 35ab7743..d6531bcc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 openai==0.27.2
 pinecone-client==2.2.1
-python-dotenv==1.0.0
\ No newline at end of file
+pre-commit>=3.2.0
+python-dotenv==1.0.0