Skip to content

Commit 54d78e5

Browse files
committed
Include decimals in saved CSV and extract boxplot stats
1 parent cb59e82 commit 54d78e5

265 files changed

Lines changed: 577144 additions & 577419 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

notebooks/plot_download.ipynb

Lines changed: 17 additions & 34 deletions
Large diffs are not rendered by default.

providentia/plot_aux.py

Lines changed: 118 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,7 +1003,7 @@ def convert_multispecies_df_units(read_instance, stats_df, zstats, base_plot_typ
10031003
return stats_df
10041004

10051005

1006-
def handle_test_or_save_df(read_instance, df, filename, path, tests_generate_output, msgs):
1006+
def handle_test_or_save_df(read_instance, df, filename, path, tests_generate_output, msgs, decimal_places):
10071007
"""
10081008
Save dataframe or assert if dataframe generates the same outputs as the dataframes saved in tests folder
10091009
@@ -1021,8 +1021,11 @@ def handle_test_or_save_df(read_instance, df, filename, path, tests_generate_out
10211021
Indicates if we want to regenerate dataframes saved in tests folder
10221022
msgs : list
10231023
Text to show after downloading file
1024+
decimal_places : int
1025+
Decimal places to round the data to when saving dataframe
10241026
"""
10251027

1028+
df = df.round(decimal_places)
10261029
if read_instance.tests:
10271030
generated_output = df
10281031
generated_output = generated_output.replace('', np.nan)
@@ -1044,7 +1047,7 @@ def handle_test_or_save_df(read_instance, df, filename, path, tests_generate_out
10441047
f"{path}/{filename}.csv",
10451048
parse_dates=parse_dates
10461049
)
1047-
print('Expected_output')
1050+
print(f'Expected_output ({f"{path}/{filename}.csv"})')
10481051
print(expected_output)
10491052
if 'metadata' in filename:
10501053
expected_output["value"] = expected_output["value"].astype(str)
@@ -1148,6 +1151,7 @@ def download_plot_data_to_csv(read_instance, canvas_instance, base_plot_type, pl
11481151
for data_label in canvas_instance.plot_elements[base_plot_type][plot_element_varname]
11491152
for key in canvas_instance.plot_elements[base_plot_type][plot_element_varname][data_label].keys()
11501153
})
1154+
11511155
element_types_to_save = []
11521156
if read_instance.mode == 'library':
11531157
# in tests do not ask
@@ -1187,9 +1191,11 @@ def download_plot_data_to_csv(read_instance, canvas_instance, base_plot_type, pl
11871191
else "concentration" if base_plot_type == "distribution"
11881192
else "x"
11891193
)
1194+
decimal_places = canvas_instance.plot_characteristics[plot_type]['round_decimal_places']['csv']
11901195

11911196
msgs = []
11921197
combined_dfs = {}
1198+
boxplot_accumulator = {}
11931199

11941200
for data_label in canvas_instance.plot_elements[base_plot_type][plot_element_varname]:
11951201
for element_type in canvas_instance.plot_elements[base_plot_type][plot_element_varname][data_label]:
@@ -1253,7 +1259,8 @@ def download_plot_data_to_csv(read_instance, canvas_instance, base_plot_type, pl
12531259
filename,
12541260
path,
12551261
tests_generate_output,
1256-
msgs
1262+
msgs,
1263+
decimal_places
12571264
)
12581265

12591266
elif base_plot_type in ['timeseries', 'distribution', 'scatter', 'fairmode-target',
@@ -1274,7 +1281,6 @@ def download_plot_data_to_csv(read_instance, canvas_instance, base_plot_type, pl
12741281
"dataset": annotation.get_text().split('|')[0].strip(),
12751282
"annotation": annotation.get_text().split('|')[1].strip()
12761283
})
1277-
12781284
df = pd.DataFrame(data)
12791285

12801286
filename = f"{plot_type}_{data_label}_{element_type}" + (
@@ -1288,77 +1294,81 @@ def download_plot_data_to_csv(read_instance, canvas_instance, base_plot_type, pl
12881294
filename,
12891295
path,
12901296
tests_generate_output,
1291-
msgs
1297+
msgs,
1298+
decimal_places
12921299
)
12931300

1294-
# extract plot data
12951301
else:
1296-
1297-
data = []
1298-
1299-
# extract patches in boxplot
1300-
if isinstance(plot_element, matplotlib.patches.PathPatch):
1301-
xy = plot_element.get_path().vertices
1302+
if base_plot_type == "boxplot":
1303+
1304+
# skip patch
1305+
if isinstance(plot_element, matplotlib.patches.PathPatch):
1306+
continue
1307+
1308+
elif isinstance(plot_element, matplotlib.lines.Line2D):
1309+
y_value = plot_element.get_ydata()[0]
1310+
1311+
if data_label not in boxplot_accumulator:
1312+
boxplot_accumulator[data_label] = []
1313+
boxplot_accumulator[data_label].append(y_value)
13021314
else:
1315+
data = []
13031316
xy = plot_element.get_xydata()
1304-
1305-
for x, y in xy:
1306-
data.append({
1307-
# convert time from unix to actual
1308-
x_column:
1309-
pd.to_datetime(
1310-
x, unit="D", utc=True).round("s")
1311-
if base_plot_type == "timeseries" else x,
1312-
1313-
"y" if base_plot_type in ["boxplot", "fairmode-target"]
1314-
else data_label: y,
1315-
})
1316-
1317-
df = pd.DataFrame(data)
1318-
1319-
filename = f"{plot_type}_{data_label}_{element_type}" + (
1320-
f"_{plot_element_i}" if len(
1321-
plot_elements) > 1 else ""
1322-
)
1323-
1324-
# combine dataframes for some plots
1325-
if base_plot_type in [
1326-
"timeseries",
1327-
"scatter",
1328-
"distribution",
1329-
"periodic",
1330-
"periodic-violin",
1331-
"taylor"
1332-
]:
1333-
# one dataframe per plot element
1334-
key = (element_type, plot_element_i)
1335-
df = df.set_index(x_column)
1336-
value_column = df.columns[0]
1337-
1338-
# column becomes the data label
1339-
df = df.rename(columns={
1340-
value_column: data_label
1341-
})
1342-
1343-
if key not in combined_dfs:
1344-
combined_dfs[key] = df
1345-
1317+
for x, y in xy:
1318+
data.append({
1319+
# convert time from unix to actual
1320+
x_column:
1321+
pd.to_datetime(x, unit="D", utc=True).round("s")
1322+
if base_plot_type == "timeseries" else x,
1323+
1324+
"y" if base_plot_type in ["fairmode-target"]
1325+
else data_label: y,
1326+
})
1327+
df = pd.DataFrame(data)
1328+
1329+
# combine dataframes for some plots
1330+
if base_plot_type in [
1331+
"timeseries",
1332+
"scatter",
1333+
"distribution",
1334+
"periodic",
1335+
"periodic-violin",
1336+
"taylor"
1337+
]:
1338+
# one dataframe per plot element
1339+
key = (element_type, plot_element_i)
1340+
df = df.set_index(x_column)
1341+
value_column = df.columns[0]
1342+
1343+
# column becomes the data label
1344+
df = df.rename(columns={
1345+
value_column: data_label
1346+
})
1347+
1348+
if key not in combined_dfs:
1349+
combined_dfs[key] = df
1350+
1351+
else:
1352+
combined_dfs[key] = pd.concat(
1353+
[combined_dfs[key], df],
1354+
axis=1
1355+
)
1356+
1357+
# for other plot types save data per data label
13461358
else:
1347-
combined_dfs[key] = pd.concat(
1348-
[combined_dfs[key], df],
1349-
axis=1
1359+
filename = f"{plot_type}_{data_label}_{element_type}" + (
1360+
f"_{plot_element_i}" if len(
1361+
plot_elements) > 1 else ""
1362+
)
1363+
msgs = handle_test_or_save_df(
1364+
read_instance,
1365+
df,
1366+
filename,
1367+
path,
1368+
tests_generate_output,
1369+
msgs,
1370+
decimal_places
13501371
)
1351-
1352-
else:
1353-
1354-
msgs = handle_test_or_save_df(
1355-
read_instance,
1356-
df,
1357-
filename,
1358-
path,
1359-
tests_generate_output,
1360-
msgs
1361-
)
13621372

13631373
elif base_plot_type == 'metadata':
13641374
text = plot_element.get_text().split('\n')
@@ -1383,7 +1393,7 @@ def download_plot_data_to_csv(read_instance, canvas_instance, base_plot_type, pl
13831393
f"_{plot_element_i}" if len(plot_elements) > 1 else ""
13841394
)
13851395
msgs = handle_test_or_save_df(
1386-
read_instance, df, filename, path, tests_generate_output, msgs)
1396+
read_instance, df, filename, path, tests_generate_output, msgs, decimal_places)
13871397

13881398
elif base_plot_type == 'map':
13891399

@@ -1446,7 +1456,7 @@ def download_plot_data_to_csv(read_instance, canvas_instance, base_plot_type, pl
14461456
filename = f"{plot_type}_{element_type}_{label}"
14471457
df = pd.DataFrame(data)
14481458
msgs = handle_test_or_save_df(
1449-
read_instance, df, filename, path, tests_generate_output, msgs)
1459+
read_instance, df, filename, path, tests_generate_output, msgs, decimal_places)
14501460

14511461
# save combined dataframes into one file per plot element
14521462
if base_plot_type in [
@@ -1455,30 +1465,56 @@ def download_plot_data_to_csv(read_instance, canvas_instance, base_plot_type, pl
14551465
"distribution",
14561466
"periodic",
14571467
"periodic-violin",
1458-
"taylor"
1468+
"taylor",
1469+
"boxplot"
14591470
]:
1460-
for (element_type, plot_element_i), df in combined_dfs.items():
14611471

1472+
if base_plot_type == "boxplot":
1473+
1474+
stats = ["whisker_low", "q1", "median", "q3", "whisker_high"]
1475+
data = {}
1476+
1477+
for label, stats_list in boxplot_accumulator.items():
1478+
stats_list_sorted = sorted(stats_list)
1479+
data[label] = dict(zip(stats, stats_list_sorted))
1480+
1481+
df = pd.DataFrame(data)
14621482
df = df.reset_index()
1463-
filename = (
1464-
f"{plot_type}_{element_type}"
1465-
+ (
1466-
f"_{plot_element_i}"
1467-
if len(plot_elements) > 1
1468-
else ""
1469-
)
1470-
)
1483+
filename = "boxplot"
14711484

14721485
msgs = handle_test_or_save_df(
14731486
read_instance,
14741487
df,
14751488
filename,
14761489
path,
14771490
tests_generate_output,
1478-
msgs
1491+
msgs,
1492+
decimal_places
14791493
)
1494+
1495+
else:
1496+
for (element_type, plot_element_i), df in combined_dfs.items():
1497+
df = df.reset_index()
1498+
filename = (
1499+
f"{plot_type}_{element_type}"
1500+
+ (
1501+
f"_{plot_element_i}"
1502+
if len(plot_elements) > 1
1503+
else ""
1504+
)
1505+
)
1506+
1507+
msgs = handle_test_or_save_df(
1508+
read_instance,
1509+
df,
1510+
filename,
1511+
path,
1512+
tests_generate_output,
1513+
msgs,
1514+
decimal_places
1515+
)
14801516

14811517
if msgs:
14821518
msg = f'Saving {plot_type} figure data to CSV:'
14831519
msg += ''.join(msgs)
1484-
show_message(read_instance, msg)
1520+
show_message(read_instance, msg)

0 commit comments

Comments
 (0)