@@ -1003,7 +1003,7 @@ def convert_multispecies_df_units(read_instance, stats_df, zstats, base_plot_typ
10031003 return stats_df
10041004
10051005
1006- def handle_test_or_save_df (read_instance , df , filename , path , tests_generate_output , msgs ):
1006+ def handle_test_or_save_df (read_instance , df , filename , path , tests_generate_output , msgs , decimal_places ):
10071007 """
10081008 Save dataframe or assert if dataframe generates the same outputs as the dataframes saved in tests folder
10091009
@@ -1021,8 +1021,11 @@ def handle_test_or_save_df(read_instance, df, filename, path, tests_generate_out
10211021 Indicates if we want to regenerate dataframes saved in tests folder
10221022 msgs : list
10231023 Text to show after downloading file
1024+ decimal_places : int
1025+ Decimal places to round the data to when saving dataframe
10241026 """
10251027
1028+ df = df .round (decimal_places )
10261029 if read_instance .tests :
10271030 generated_output = df
10281031 generated_output = generated_output .replace ('' , np .nan )
@@ -1044,7 +1047,7 @@ def handle_test_or_save_df(read_instance, df, filename, path, tests_generate_out
10441047 f"{ path } /{ filename } .csv" ,
10451048 parse_dates = parse_dates
10461049 )
1047- print ('Expected_output' )
1050+ print (f 'Expected_output ( { f" { path } / { filename } .csv" } ) ' )
10481051 print (expected_output )
10491052 if 'metadata' in filename :
10501053 expected_output ["value" ] = expected_output ["value" ].astype (str )
@@ -1148,6 +1151,7 @@ def download_plot_data_to_csv(read_instance, canvas_instance, base_plot_type, pl
11481151 for data_label in canvas_instance .plot_elements [base_plot_type ][plot_element_varname ]
11491152 for key in canvas_instance .plot_elements [base_plot_type ][plot_element_varname ][data_label ].keys ()
11501153 })
1154+
11511155 element_types_to_save = []
11521156 if read_instance .mode == 'library' :
11531157 # in tests do not ask
@@ -1187,9 +1191,11 @@ def download_plot_data_to_csv(read_instance, canvas_instance, base_plot_type, pl
11871191 else "concentration" if base_plot_type == "distribution"
11881192 else "x"
11891193 )
1194+ decimal_places = canvas_instance .plot_characteristics [plot_type ]['round_decimal_places' ]['csv' ]
11901195
11911196 msgs = []
11921197 combined_dfs = {}
1198+ boxplot_accumulator = {}
11931199
11941200 for data_label in canvas_instance .plot_elements [base_plot_type ][plot_element_varname ]:
11951201 for element_type in canvas_instance .plot_elements [base_plot_type ][plot_element_varname ][data_label ]:
@@ -1253,7 +1259,8 @@ def download_plot_data_to_csv(read_instance, canvas_instance, base_plot_type, pl
12531259 filename ,
12541260 path ,
12551261 tests_generate_output ,
1256- msgs
1262+ msgs ,
1263+ decimal_places
12571264 )
12581265
12591266 elif base_plot_type in ['timeseries' , 'distribution' , 'scatter' , 'fairmode-target' ,
@@ -1274,7 +1281,6 @@ def download_plot_data_to_csv(read_instance, canvas_instance, base_plot_type, pl
12741281 "dataset" : annotation .get_text ().split ('|' )[0 ].strip (),
12751282 "annotation" : annotation .get_text ().split ('|' )[1 ].strip ()
12761283 })
1277-
12781284 df = pd .DataFrame (data )
12791285
12801286 filename = f"{ plot_type } _{ data_label } _{ element_type } " + (
@@ -1288,77 +1294,81 @@ def download_plot_data_to_csv(read_instance, canvas_instance, base_plot_type, pl
12881294 filename ,
12891295 path ,
12901296 tests_generate_output ,
1291- msgs
1297+ msgs ,
1298+ decimal_places
12921299 )
12931300
1294- # extract plot data
12951301 else :
1296-
1297- data = []
1298-
1299- # extract patches in boxplot
1300- if isinstance (plot_element , matplotlib .patches .PathPatch ):
1301- xy = plot_element .get_path ().vertices
1302+ if base_plot_type == "boxplot" :
1303+
1304+ # skip patch
1305+ if isinstance (plot_element , matplotlib .patches .PathPatch ):
1306+ continue
1307+
1308+ elif isinstance (plot_element , matplotlib .lines .Line2D ):
1309+ y_value = plot_element .get_ydata ()[0 ]
1310+
1311+ if data_label not in boxplot_accumulator :
1312+ boxplot_accumulator [data_label ] = []
1313+ boxplot_accumulator [data_label ].append (y_value )
13021314 else :
1315+ data = []
13031316 xy = plot_element .get_xydata ()
1304-
1305- for x , y in xy :
1306- data .append ({
1307- # convert time from unix to actual
1308- x_column :
1309- pd .to_datetime (
1310- x , unit = "D" , utc = True ).round ("s" )
1311- if base_plot_type == "timeseries" else x ,
1312-
1313- "y" if base_plot_type in ["boxplot" , "fairmode-target" ]
1314- else data_label : y ,
1315- })
1316-
1317- df = pd .DataFrame (data )
1318-
1319- filename = f"{ plot_type } _{ data_label } _{ element_type } " + (
1320- f"_{ plot_element_i } " if len (
1321- plot_elements ) > 1 else ""
1322- )
1323-
1324- # combine dataframes for some plots
1325- if base_plot_type in [
1326- "timeseries" ,
1327- "scatter" ,
1328- "distribution" ,
1329- "periodic" ,
1330- "periodic-violin" ,
1331- "taylor"
1332- ]:
1333- # one dataframe per plot element
1334- key = (element_type , plot_element_i )
1335- df = df .set_index (x_column )
1336- value_column = df .columns [0 ]
1337-
1338- # column becomes the data label
1339- df = df .rename (columns = {
1340- value_column : data_label
1341- })
1342-
1343- if key not in combined_dfs :
1344- combined_dfs [key ] = df
1345-
1317+ for x , y in xy :
1318+ data .append ({
1319+ # convert time from unix to actual
1320+ x_column :
1321+ pd .to_datetime (x , unit = "D" , utc = True ).round ("s" )
1322+ if base_plot_type == "timeseries" else x ,
1323+
1324+ "y" if base_plot_type in ["fairmode-target" ]
1325+ else data_label : y ,
1326+ })
1327+ df = pd .DataFrame (data )
1328+
1329+ # combine dataframes for some plots
1330+ if base_plot_type in [
1331+ "timeseries" ,
1332+ "scatter" ,
1333+ "distribution" ,
1334+ "periodic" ,
1335+ "periodic-violin" ,
1336+ "taylor"
1337+ ]:
1338+ # one dataframe per plot element
1339+ key = (element_type , plot_element_i )
1340+ df = df .set_index (x_column )
1341+ value_column = df .columns [0 ]
1342+
1343+ # column becomes the data label
1344+ df = df .rename (columns = {
1345+ value_column : data_label
1346+ })
1347+
1348+ if key not in combined_dfs :
1349+ combined_dfs [key ] = df
1350+
1351+ else :
1352+ combined_dfs [key ] = pd .concat (
1353+ [combined_dfs [key ], df ],
1354+ axis = 1
1355+ )
1356+
1357+ # for other plot types save data per data label
13461358 else :
1347- combined_dfs [key ] = pd .concat (
1348- [combined_dfs [key ], df ],
1349- axis = 1
1359+ filename = f"{ plot_type } _{ data_label } _{ element_type } " + (
1360+ f"_{ plot_element_i } " if len (
1361+ plot_elements ) > 1 else ""
1362+ )
1363+ msgs = handle_test_or_save_df (
1364+ read_instance ,
1365+ df ,
1366+ filename ,
1367+ path ,
1368+ tests_generate_output ,
1369+ msgs ,
1370+ decimal_places
13501371 )
1351-
1352- else :
1353-
1354- msgs = handle_test_or_save_df (
1355- read_instance ,
1356- df ,
1357- filename ,
1358- path ,
1359- tests_generate_output ,
1360- msgs
1361- )
13621372
13631373 elif base_plot_type == 'metadata' :
13641374 text = plot_element .get_text ().split ('\n ' )
@@ -1383,7 +1393,7 @@ def download_plot_data_to_csv(read_instance, canvas_instance, base_plot_type, pl
13831393 f"_{ plot_element_i } " if len (plot_elements ) > 1 else ""
13841394 )
13851395 msgs = handle_test_or_save_df (
1386- read_instance , df , filename , path , tests_generate_output , msgs )
1396+ read_instance , df , filename , path , tests_generate_output , msgs , decimal_places )
13871397
13881398 elif base_plot_type == 'map' :
13891399
@@ -1446,7 +1456,7 @@ def download_plot_data_to_csv(read_instance, canvas_instance, base_plot_type, pl
14461456 filename = f"{ plot_type } _{ element_type } _{ label } "
14471457 df = pd .DataFrame (data )
14481458 msgs = handle_test_or_save_df (
1449- read_instance , df , filename , path , tests_generate_output , msgs )
1459+ read_instance , df , filename , path , tests_generate_output , msgs , decimal_places )
14501460
14511461 # save combined dataframes into one file per plot element
14521462 if base_plot_type in [
@@ -1455,30 +1465,56 @@ def download_plot_data_to_csv(read_instance, canvas_instance, base_plot_type, pl
14551465 "distribution" ,
14561466 "periodic" ,
14571467 "periodic-violin" ,
1458- "taylor"
1468+ "taylor" ,
1469+ "boxplot"
14591470 ]:
1460- for (element_type , plot_element_i ), df in combined_dfs .items ():
14611471
1472+ if base_plot_type == "boxplot" :
1473+
1474+ stats = ["whisker_low" , "q1" , "median" , "q3" , "whisker_high" ]
1475+ data = {}
1476+
1477+ for label , stats_list in boxplot_accumulator .items ():
1478+ stats_list_sorted = sorted (stats_list )
1479+ data [label ] = dict (zip (stats , stats_list_sorted ))
1480+
1481+ df = pd .DataFrame (data )
14621482 df = df .reset_index ()
1463- filename = (
1464- f"{ plot_type } _{ element_type } "
1465- + (
1466- f"_{ plot_element_i } "
1467- if len (plot_elements ) > 1
1468- else ""
1469- )
1470- )
1483+ filename = "boxplot"
14711484
14721485 msgs = handle_test_or_save_df (
14731486 read_instance ,
14741487 df ,
14751488 filename ,
14761489 path ,
14771490 tests_generate_output ,
1478- msgs
1491+ msgs ,
1492+ decimal_places
14791493 )
1494+
1495+ else :
1496+ for (element_type , plot_element_i ), df in combined_dfs .items ():
1497+ df = df .reset_index ()
1498+ filename = (
1499+ f"{ plot_type } _{ element_type } "
1500+ + (
1501+ f"_{ plot_element_i } "
1502+ if len (plot_elements ) > 1
1503+ else ""
1504+ )
1505+ )
1506+
1507+ msgs = handle_test_or_save_df (
1508+ read_instance ,
1509+ df ,
1510+ filename ,
1511+ path ,
1512+ tests_generate_output ,
1513+ msgs ,
1514+ decimal_places
1515+ )
14801516
14811517 if msgs :
14821518 msg = f'Saving { plot_type } figure data to CSV:'
14831519 msg += '' .join (msgs )
1484- show_message (read_instance , msg )
1520+ show_message (read_instance , msg )
0 commit comments