-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconversation.py
142 lines (120 loc) · 4.38 KB
/
conversation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
from tqdm.notebook import trange, tqdm
from coder import Coder
from rl.llm_agent import LLMAgent
from rl.environment import Environment
from rl.code_evaluator import CodeEvaluator, CSV_PATH
from rl.policies import EpsilonGreedyPolicy
from rl.utils import compute_delta_grade, is_terminate_grade
with open(CSV_PATH, "r") as f:
csv_data = f.read()
def_env = Environment()
evaluator = CodeEvaluator(
environment=def_env,
prompt="Evaluate the python code bellow: give grades from 0 to 100 in reliability and clarity. Briefly explain your grades.",
name="Code Evaluator"
)
def create_coder(prompts_props: list[dict]) -> Coder:
"""Create a coder agent.
Keep in mind that the coder agent is not an RL agent, but a simple holder for the rewards given
the initial prompt and the conversation.
"""
global def_env
return Coder(prompts_props, def_env)
def create_reviewer(prompts: list[str]) -> LLMAgent:
"""Create a reviewer agent.
Parameters
----------
prompts : list[str]
List of prompts to be used by the reviewer agent.
Returns
-------
LLMAgent
The reviewer agent that will be used to evaluate the conversation
"""
global def_env
return LLMAgent(
environment=def_env,
prompts=prompts,
initial_value=100,
policy=EpsilonGreedyPolicy(0.1),
name="Reviewer"
)
def create_refiner(prompts: list[str]) -> LLMAgent:
"""Create a refiner agent.
Parameters
----------
prompts : list[str]
List of prompts to be used by the refiner agent.
Returns
-------
LLMAgent
The refiner agent that will be used to refine the conversation
"""
global def_env
return LLMAgent(
environment=def_env,
prompts=prompts,
initial_value=100,
policy=EpsilonGreedyPolicy(0.1),
name="Code Refiner"
)
def start_conversation(
coder: Coder,
coder_prompt_dict: dict,
reviewer: LLMAgent,
refiner: LLMAgent,
max_turns: int = 5,
) -> Environment:
"""Start a conversation between the coder, reviewer, refiner and evaluator.
Parameters
----------
coder : Coder
The coder agent that will be used to generate the initial code.
coder_prompt_dict : dict
The prompt dict to be used by the coder agent.
reviewer : LLMAgent
The reviewer agent that will be used to evaluate the conversation.
refiner : LLMAgent
The refiner agent that will be used to refine the conversation.
max_turns : int, optional
The maximum number of turns that the conversation will last, by default 5.
Returns
-------
Environment
The final environment after the conversation.
"""
global evaluator, csv_data
environment = Environment()
# Set the environment for all agents
for agent in [coder, reviewer, refiner, evaluator]:
agent.environment = environment
# Adding csv in the first coder prompt
coder_prompt_dict = coder_prompt_dict.copy()
coder_prompt_dict["prompt"] += f"\n\nFile content:\n\n{csv_data}"
coder.add_message(coder_prompt_dict)
# Start the conversation
last_grade = None
last_code_content = None
for turn in tqdm(range(max_turns), desc="Conv. turns", position=1, leave=False):
# Evaluates the code and status of the CSV
grade = evaluator.evaluate_code()
# Check if the score is enough to close
if is_terminate_grade(grade):
break
# If it is the first turn, reward the coder
elif last_grade is None:
coder.first_reward(grade)
last_code_content = environment.get_last_message("Coder")["content"]
# If it is not the first turn, reward refiner and reviewer
else:
last_code_content = environment.get_last_message("Code Refiner")["content"]
delta_grade = compute_delta_grade(last_grade, grade)
refiner.reward(delta_grade)
reviewer.reward(delta_grade)
# Add reviewer and refiner messages to the conversation
last_code_content = reviewer.add_message(last_code_content)
refiner.add_message(last_code_content)
last_grade = grade
# Reward the Coder with the latest review
coder.final_reward(last_grade)
return environment, reviewer.get_history(), refiner.get_history(), last_grade