Skip to content

Commit b651bfb

Browse files
committed
evaluation - create_excel_from_results: multiple adjustments to increase robustness when a corrupted or incomplete data is present, support for bigger datasets (column numbering)
1 parent b2e39d1 commit b651bfb

File tree

1 file changed

+55
-18
lines changed

1 file changed

+55
-18
lines changed

srpb_evaluation/scripts/create_excel_from_results.py

Lines changed: 55 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import csv
1111
import excel_sheet_defines
1212
import glob
13+
import re
1314
import sys
1415

1516
from string import ascii_uppercase
@@ -25,18 +26,29 @@ def get_log_dirs_planner(dir_path: str, planner_name: str, min_logs=3):
2526
dirnames = dirnames_underscore
2627
dirnames.extend(dirnames_dash)
2728
if len(dirnames) < min_logs:
28-
exit(f'Too few data entries for {planner_name} planner. Got: \r\n{dirnames}')
29-
return dirnames
29+
exit(f'Too few data entries for {planner_name} planner. Got {len(dirnames)}/{min_logs}: \r\n{dirnames}')
30+
31+
# got rid of the dirs that are marked to be ignored
32+
dirnames_valid = []
33+
for dirname in dirnames:
34+
if re.search('ignore', dirname, re.IGNORECASE):
35+
print(f'Ignoring a directory `{dirname}` from including to the overall `{planner_name}` planner results')
36+
continue
37+
dirnames_valid.append(dirname)
38+
39+
return dirnames_valid
3040

3141

3242
def get_log_result_files(log_dirs: str):
3343
result_files = []
3444
for log_dir in log_dirs:
3545
results_file = glob.glob(log_dir + '/' + '*results.txt*')
3646
if len(results_file) == 0:
37-
exit(f'Lack of results file at {log_dir}')
47+
print(f'Lack of results file at `{log_dir}` dir, skipping')
48+
continue
3849
elif len(results_file) > 1:
39-
exit(f'Multiple results files at {log_dir}')
50+
print(f'Multiple results files at `{log_dir}` dir, skipping')
51+
continue
4052
result_files.append(results_file[0])
4153
return result_files
4254

@@ -53,10 +65,17 @@ def read_results_file(results_file: str):
5365
for row in csv_result:
5466
key = str(row[0]).lstrip().rstrip()
5567
value_str = str(row[1])
56-
if value_str.find('.') != -1:
57-
value = float(value_str)
58-
else:
59-
value = int(value_str)
68+
try:
69+
if 'nan' in value_str:
70+
value = None
71+
print(f"NaN value detected for the key `{key}` in the results file `{results_file}`")
72+
elif value_str.find('.') != -1:
73+
value = float(value_str)
74+
else:
75+
value = int(value_str)
76+
except ValueError:
77+
print(f"Cannot convert a value `{value_str}` of a key `{key}` to a numeric, skipping `{results_file}`")
78+
return {}
6079
dict[key] = value
6180
return dict
6281

@@ -168,14 +187,21 @@ def cell_coords_to_sheet_cell_id(row: int, col: int):
168187
found_col = True
169188
break
170189
it_col = it_col + 1
171-
# repeat once again if required (doubling letters)
190+
# repeat once again if required (doubling letters) - columns A - ZZ cover a sufficient number of cases
172191
if not found_col:
173-
for c in ascii_uppercase:
174-
if it_col == col_w_offset:
175-
cell_col = 'A' + str(c)
176-
found_col = True
192+
# first letter of the cell address
193+
for first in ascii_uppercase:
194+
# second letter of the cell address
195+
for second in ascii_uppercase:
196+
# print(f'[cell_coords_to_sheet_cell_id] row {row}, col {col}, cell row {cell_row} | it col {it_col}, ADDR `{first}{second}`')
197+
if it_col == col_w_offset:
198+
cell_col = str(first) + str(second)
199+
found_col = True
200+
break
201+
it_col = it_col + 1
202+
# break the outer loop if possible
203+
if found_col:
177204
break
178-
it_col = it_col + 1
179205
if not found_col:
180206
exit(f"Could not find a valid column ID for ({row}, {col}) configuration")
181207

@@ -205,9 +231,17 @@ def get_sheet_datacell(planner: str, trial: int, result_key: str, results_total:
205231

206232
# counting rows from the start - exemplary key to iterate through metrics
207233
metric_counter = 0
208-
results_example = results_total[planner][0]
234+
try:
235+
results_example = results_total[planner][0]
236+
except IndexError:
237+
print(
238+
f'The planner `{planner}` does not seem to contain any keys with metric names. '
239+
f'Got `{results_total[planner]}` while searching for the metric `{result_key}` for the trial `{trial}`'
240+
)
241+
return None
242+
209243
if not result_key in results_example.keys():
210-
exit(f'Cannot proceed as results do not contain that key: {result_key}. Available keys: {results_example.keys()}')
244+
exit(f'Cannot proceed as results do not contain that key: `{result_key}`. Available keys: `{results_example.keys()}`')
211245

212246
found = False
213247
for key in results_example:
@@ -251,6 +285,8 @@ def calculate_sheet(wb: Workbook, planner_names: List[str], results_total: Dict,
251285
planner_trial_last_id = planner_trials - 1
252286
cell_begin = get_sheet_datacell(planner, 0, metric, results)
253287
cell_end = get_sheet_datacell(planner, planner_trial_last_id, metric, results)
288+
if cell_begin == None or cell_end == None:
289+
continue
254290
# fill spreadsheet
255291
ws[col_header + str(row_metric_start + m_index)] = metric
256292
ws[col_planner + str(row_start)] = planner
@@ -307,7 +343,7 @@ def calculate_sheet(wb: Workbook, planner_names: List[str], results_total: Dict,
307343
calculate_sheet(ws, planners, results, 'MEDIAN')
308344

309345
# Prepare name of the output file
310-
output_filename = 'results'
346+
output_filename = 'results' + '_' + Path(logs_dir).name
311347
for planner in planners:
312348
output_filename = output_filename + '_' + planner
313349
output_filename = output_filename.rstrip('_') + '.xlsx'
@@ -322,5 +358,6 @@ def calculate_sheet(wb: Workbook, planner_names: List[str], results_total: Dict,
322358
# cell will probably return None
323359
# Ref1: https://itecnote.com/tecnote/python-openpyxl-data_onlytrue-returning-none/
324360
# Ref2: https://groups.google.com/g/openpyxl-users/c/GbBOnOa8g7Y
325-
print(f'Consider opening the results file and saving it in Excel/LibreOffice Calc (without any modifications).')
361+
print(f'Consider opening the results file and saving it with Excel/LibreOffice Calc (without any modifications).')
326362
print(f'It will produce cached values based on formulas written (`openpyxl` library is not able to do so).')
363+
print(f'This is a necessary step when one wants to use the script that creates a LaTeX table from a spreadsheet')

0 commit comments

Comments
 (0)