-
Notifications
You must be signed in to change notification settings - Fork 20
Expand file tree
/
Copy pathgenerate_batch_results.py
More file actions
238 lines (206 loc) · 10.1 KB
/
Copy pathgenerate_batch_results.py
File metadata and controls
238 lines (206 loc) · 10.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
#!/usr/bin/env python3
import os
import json
import glob
from pathlib import Path
from typing import Dict, Any, List, Optional
def find_trajectory_file(task_dir: str) -> Optional[str]:
"""
Finds the trajectory file in the given task directory.
"""
try:
# First, look for .traj files directly in the task_dir
# Then, look in immediate subdirectories for .traj files
traj_files = glob.glob(os.path.join(task_dir, '*.traj')) + \
glob.glob(os.path.join(task_dir, '**', '*.traj'), recursive=True)
for filepath in traj_files:
if filepath.endswith('.traj'):
return filepath
except (FileNotFoundError, PermissionError) as e:
print(f"Error accessing directory {task_dir}: {e}")
return None
def parse_trajectory_stats(trajectory_path: str) -> Optional[Dict[str, Any]]:
"""
Parses model_stats information from the trajectory file.
"""
try:
if not os.path.exists(trajectory_path):
print(f"Trajectory file not found: {trajectory_path}")
return None
# Try loading the entire file directly as a JSON object
try:
with open(trajectory_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# Check if it contains info.model_stats
if isinstance(data, dict):
if 'info' in data and isinstance(data['info'], dict) and \
'model_stats' in data['info'] and isinstance(data['info']['model_stats'], dict):
return data['info']['model_stats']
if 'model_stats' in data and isinstance(data['model_stats'], dict):
return data['model_stats']
except (json.JSONDecodeError, MemoryError) as e:
print(f"Failed to load trajectory as single JSON object: {e}. Trying line-by-line parsing.")
# If the above method fails, try to find model_stats from the end of the file
with open(trajectory_path, 'r', encoding='utf-8') as f:
f.seek(0, 2) # Move to the end of the file
file_size = f.tell()
chunk_size = 100000 # Read last 100KB
position = file_size
buffer = ""
while position > 0:
read_size = min(chunk_size, position)
position -= read_size
f.seek(position)
# Read chunk and concatenate with the unparsed part of the previous chunk
current_chunk = f.read(read_size)
content_to_search = current_chunk + buffer
# Find "model_stats": { ... } structure
last_match_pos = -1
search_start_pos = 0
while True:
match_pos = content_to_search.find('"model_stats":', search_start_pos)
if match_pos != -1:
last_match_pos = match_pos
search_start_pos = match_pos + 1
else:
break
if last_match_pos != -1:
# Found "model_stats":, try to extract the JSON object after it
json_text_start = content_to_search.find('{', last_match_pos + len('"model_stats":'))
if json_text_start != -1:
brace_count = 0
json_obj_str = ""
for i in range(json_text_start, len(content_to_search)):
char = content_to_search[i]
json_obj_str += char
if char == '{':
brace_count += 1
elif char == '}':
brace_count -= 1
if brace_count == 0:
try:
model_stats = json.loads(json_obj_str)
print(f"Successfully parsed model_stats from chunk in {trajectory_path}")
return model_stats
except json.JSONDecodeError:
# Parsing failed, possibly incomplete or malformed JSON object
print(f"JSONDecodeError for model_stats chunk")
# Continue in the outer loop to find the next occurrence of model_stats or read more content
break
# If brace_count is not 0 when the inner loop finishes, the JSON object might span across chunks
if brace_count != 0:
buffer = content_to_search[json_text_start:]
else:
buffer = ""
else:
buffer = content_to_search
else:
buffer = current_chunk
# If already read to the beginning of the file, and still no result in buffer, parsing failed
if position == 0:
break
print(f"Could not find or parse valid model_stats in trajectory file: {trajectory_path}")
return None
except Exception as e:
print(f"Error reading or parsing trajectory file {trajectory_path}: {e}")
return None
def main():
# Path configuration
base_dir = "/data/code/agent_new/SWE-agent/trajectories/youwang-claude4-opus"
openai_dir = os.path.join(base_dir, "openai")
batch_results_file = os.path.join(base_dir, "batch_results.jsonl")
# Results list
results = []
# Iterate through all task directories under the openai directory
print(f"Scanning directory: {openai_dir}")
task_dirs = glob.glob(os.path.join(openai_dir, "claude-opus-4-*"))
print(f"Found {len(task_dirs)} task directories")
for task_dir in task_dirs:
task_name = os.path.basename(task_dir)
print(f"Processing task directory: {task_name}")
# Extract the original task name (without extension) from the directory name
# E.g., claude-opus-4-20250514-Faker_01 -> Faker_01
if "-" in task_name:
parts = task_name.split("-")
if len(parts) >= 5: # claude-opus-4-20250514-Faker_01
original_task_name = parts[4] # Get the Faker_01 part
if len(parts) > 5: # Handle additional hyphens that might be in the task name
original_task_name = "-".join(parts[4:])
else:
original_task_name = task_name
else:
original_task_name = task_name
# Add .md extension to get the full task name
md_task_name = f"{original_task_name}.md"
# Find the trajectory file
traj_file = find_trajectory_file(task_dir)
if traj_file:
print(f"Found trajectory file: {traj_file}")
# Parse model_stats
model_stats = parse_trajectory_stats(traj_file)
if model_stats:
# Build the result entry
entry = {
"task_name": md_task_name,
"run_id": task_name,
"success": True, # Assume success if stats are present
"instance_cost": model_stats.get("instance_cost"),
"tokens_sent": model_stats.get("tokens_sent"),
"tokens_received": model_stats.get("tokens_received"),
"api_calls": model_stats.get("api_calls"),
"error": None
}
results.append(entry)
print(
f"Added stats for {md_task_name}: cost={entry['instance_cost']}, tokens_sent={entry['tokens_sent']}")
else:
# Could not parse model_stats but trajectory file was found
entry = {
"task_name": md_task_name,
"run_id": task_name,
"success": False,
"instance_cost": None,
"tokens_sent": None,
"tokens_received": None,
"api_calls": None,
"error": "Could not parse model_stats from trajectory file"
}
results.append(entry)
print(f"Failed to parse stats for {md_task_name}")
else:
# Trajectory file not found
entry = {
"task_name": md_task_name,
"run_id": task_name,
"success": False,
"instance_cost": None,
"tokens_sent": None,
"tokens_received": None,
"api_calls": None,
"error": "Trajectory file not found"
}
results.append(entry)
print(f"No trajectory file found for {md_task_name}")
# Save results to batch_results.jsonl
print(f"\nSaving {len(results)} results to {batch_results_file}")
with open(batch_results_file, 'w', encoding='utf-8') as f:
for entry in results:
f.write(json.dumps(entry, ensure_ascii=False) + '\n')
# Calculate total cost and statistics
total_cost = sum(entry.get("instance_cost") or 0 for entry in results)
total_tokens_sent = sum(entry.get("tokens_sent") or 0 for entry in results)
total_tokens_received = sum(entry.get("tokens_received") or 0 for entry in results)
total_api_calls = sum(entry.get("api_calls") or 0 for entry in results)
successful_tasks = sum(1 for entry in results if entry.get("success", False))
print("\n--- Overall Statistics ---")
print(f"Total tasks processed: {len(results)}")
print(f"Successful tasks: {successful_tasks}")
print(f"Failed tasks: {len(results) - successful_tasks}")
print(f"Total cost: ${total_cost:.4f}")
print(f"Total tokens sent: {total_tokens_sent:,}")
print(f"Total tokens received: {total_tokens_received:,}")
print(f"Total API calls: {total_api_calls:,}")
if successful_tasks > 0:
print(f"Average cost per successful task: ${total_cost / successful_tasks:.4f}")
if __name__ == "__main__":
main()