-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract_sgpt.py
58 lines (46 loc) · 2.01 KB
/
extract_sgpt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import os
import json
import sys
def write_file(file_path, content):
with open(file_path, 'w', encoding='utf-8') as file:
file.write(content)
def extract_conversation_pair(conversations, question_folder, answer_folder):
for conversation in conversations:
conversation_id = conversation["id"]
pairs = conversation["conversations"]
# Separate questions and answers
questions = [pair["value"] for pair in pairs if pair["from"] == "human"]
answers = [pair["value"] for pair in pairs if pair["from"] == "gpt"]
for i, (question, answer) in enumerate(zip(questions, answers), 1):
# Save question to Q_extracted folder
question_file_path = os.path.join(question_folder, f'{conversation_id}_Q{i}.txt')
write_file(question_file_path, question)
# Save answer to A_extracted folder
answer_file_path = os.path.join(answer_folder, f'{conversation_id}_A{i}.txt')
write_file(answer_file_path, answer)
def main():
# Check if a filename argument is provided
if len(sys.argv) != 2:
print("Usage: python app.py <json_file>")
sys.exit(1)
json_filename = sys.argv[1]
# Define the folder paths
question_extracted_folder = './Q_extracted'
answer_extracted_folder = './A_extracted'
# Create folders if they don't exist
os.makedirs(question_extracted_folder, exist_ok=True)
os.makedirs(answer_extracted_folder, exist_ok=True)
# Load JSON from the specified file
try:
with open(json_filename, 'r', encoding='utf-8') as json_file:
json_data = json.load(json_file)
except FileNotFoundError:
print(f"Error: File '{json_filename}' not found.")
sys.exit(1)
except json.JSONDecodeError:
print(f"Error: Invalid JSON format in file '{json_filename}'.")
sys.exit(1)
# Extract conversation pairs
extract_conversation_pair(json_data, question_extracted_folder, answer_extracted_folder)
if __name__ == "__main__":
main()