Skip to content
Open
Changes from 1 commit
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
bb7d854
added BasketballDataset class
not-heavychevy Apr 10, 2025
2abeeff
added BasketballPitchDimensions class
not-heavychevy Apr 10, 2025
bd59522
added graph settings
not-heavychevy Apr 10, 2025
8a83938
added optimized graph converter
not-heavychevy Apr 10, 2025
f5071c6
added ball handling
not-heavychevy Apr 10, 2025
26d6d85
added init files
not-heavychevy Apr 10, 2025
f2d164b
bugfix dataset load() bug
not-heavychevy Apr 10, 2025
d86c0af
added tests
not-heavychevy Apr 10, 2025
d1c0c73
added additional fields computation
not-heavychevy Apr 10, 2025
64f5ee3
BasketballDataset inherits from DefaultDataset
not-heavychevy Apr 12, 2025
835cd59
bugfix
not-heavychevy Apr 12, 2025
98f09ae
files read with kloppy.io
not-heavychevy Apr 19, 2025
0502aa7
added norm parameters
not-heavychevy Apr 19, 2025
d2f6b52
refactor: move get_dataframe to DefaultDataset
not-heavychevy Apr 20, 2025
53ea444
created post_init
not-heavychevy Apr 20, 2025
3482bf9
added self.settings to BasketballDataset
not-heavychevy Apr 20, 2025
51a6657
added add_dummy_labels и add_graph_ids
not-heavychevy Apr 20, 2025
1352f80
rewritten tests for dataset.py
not-heavychevy Apr 21, 2025
b0fc5c1
Refactor BasketballPitchDimensions
not-heavychevy Apr 25, 2025
1e04bfd
added tests for BasketballPitchDimensions
not-heavychevy Apr 25, 2025
627fae8
Refactor BasketballGraphSettings
not-heavychevy Apr 25, 2025
1bdd740
added tests for BasketballGraphSettings
not-heavychevy Apr 25, 2025
7c64156
Merge PitchDimensions and GraphSettings
not-heavychevy Apr 25, 2025
a70739c
graph_settings test update
not-heavychevy Apr 25, 2025
ebe0914
import bugs fix
not-heavychevy Apr 25, 2025
2dcd3fb
graph_converter refactoring
not-heavychevy Apr 26, 2025
4b96024
dataset separator bugfix
not-heavychevy Apr 26, 2025
af3a02a
added tests for graph_converter
not-heavychevy Apr 26, 2025
8a47337
moved the functionality to “features”
not-heavychevy Apr 26, 2025
633afca
tests update
not-heavychevy Apr 26, 2025
7463b1e
tests fix
not-heavychevy Apr 26, 2025
dcfa8e4
Deprecate speed/acceleration thresholds
not-heavychevy Apr 26, 2025
1b5bc3b
unify data/settings access on DefaultDataset
not-heavychevy Apr 26, 2025
7eb2081
Refactor _convert to use polars methods
not-heavychevy Apr 26, 2025
b0b9d72
Add unified graph-export API to GraphConverter
not-heavychevy Apr 26, 2025
e55d30e
added new tests for public export API
not-heavychevy Apr 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Refactor _convert to use polars methods
  • Loading branch information
not-heavychevy committed Apr 26, 2025
commit 7eb208147c0424da510caa8c537853e31bda1079
65 changes: 51 additions & 14 deletions unravel/basketball/graphs/graph_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,27 +95,64 @@ def _sport_specific_checks(self) -> None:
raise ValueError(f"Missing graph_id column '{self._exprs_variables['graph_id_col']}'")

def _convert(self) -> pl.DataFrame:
"""
Convert the raw Polars DataFrame into a graph‐structured DataFrame,
returning one row per unique frame_id with columns:
- id: frame identifier
- x: node feature matrix (np.ndarray)
- a: adjacency matrix (np.ndarray)
- e: edge feature matrix (np.ndarray)
- y: label for that frame

Uses Polars group_by/agg to collect per-frame lists without Python-level loops.
"""
from .features.node_features import compute_node_features
from .features.adjacency_matrix import compute_adjacency_matrix
from .features.edge_features import compute_edge_features

df = self.dataset_obj.data
# get all unique frame IDs
frame_ids = df.select("frame_id").unique().to_series().to_list()
node_cols = self._exprs_variables["node_feature_cols"]
label_col = self._exprs_variables["label_col"]

# Group by frame, collect each feature and team into list columns, grab first label
aggregated = (
df
.group_by("frame_id")
.agg(
# For each node feature, pl.col(c) automatically produces a list of values
*[pl.col(c).alias(f"{c}_list") for c in node_cols],
pl.col("team").alias("team_list"),
pl.col(label_col).first().alias("y"),
)
)

# Build out each graph row from the collected lists
rows = []
for fid in frame_ids:
recs = df.filter(pl.col("frame_id") == fid).to_dicts()

for row in aggregated.rows(named=True):
# Reconstruct per-entity dicts from parallel lists
n = len(row[f"{node_cols[0]}_list"])
records = [
{c: row[f"{c}_list"][i] for c in node_cols} | {"team": row["team_list"][i]}
for i in range(n)
]

# Compute node matrix & teams
x, teams = compute_node_features(
recs,
records,
normalize_coordinates=self.settings.normalize_coordinates,
pitch_dimensions=self.settings.pitch_dimensions,
node_feature_cols=self._exprs_variables["node_feature_cols"],
)
a = compute_adjacency_matrix(
teams,
self_loop=self.settings.self_loop_ball,
node_feature_cols=node_cols,
)
# Build adjacency & edge
a = compute_adjacency_matrix(teams, self_loop=self.settings.self_loop_ball)
e = compute_edge_features(x)

y = recs[0].get(self.label_col)
rows.append({"id": fid, "x": x, "a": a, "e": e, "y": y})
rows.append({
"id": row["frame_id"],
"x": x,
"a": a,
"e": e,
"y": row["y"],
})

return pl.DataFrame(rows)
return pl.DataFrame(rows)