Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
bb7d854
added BasketballDataset class
not-heavychevy Apr 10, 2025
2abeeff
added BasketballPitchDimensions class
not-heavychevy Apr 10, 2025
bd59522
added graph settings
not-heavychevy Apr 10, 2025
8a83938
added optimized graph converter
not-heavychevy Apr 10, 2025
f5071c6
added ball handling
not-heavychevy Apr 10, 2025
26d6d85
added init files
not-heavychevy Apr 10, 2025
f2d164b
bugfix dataset load() bug
not-heavychevy Apr 10, 2025
d86c0af
added tests
not-heavychevy Apr 10, 2025
d1c0c73
added additional fields computation
not-heavychevy Apr 10, 2025
64f5ee3
BasketballDataset inherits from DefaultDataset
not-heavychevy Apr 12, 2025
835cd59
bugfix
not-heavychevy Apr 12, 2025
98f09ae
files read with kloppy.io
not-heavychevy Apr 19, 2025
0502aa7
added norm parameters
not-heavychevy Apr 19, 2025
d2f6b52
refactor: move get_dataframe to DefaultDataset
not-heavychevy Apr 20, 2025
53ea444
created post_init
not-heavychevy Apr 20, 2025
3482bf9
added self.settings to BasketballDataset
not-heavychevy Apr 20, 2025
51a6657
added add_dummy_labels и add_graph_ids
not-heavychevy Apr 20, 2025
1352f80
rewritten tests for dataset.py
not-heavychevy Apr 21, 2025
b0fc5c1
Refactor BasketballPitchDimensions
not-heavychevy Apr 25, 2025
1e04bfd
added tests for BasketballPitchDimensions
not-heavychevy Apr 25, 2025
627fae8
Refactor BasketballGraphSettings
not-heavychevy Apr 25, 2025
1bdd740
added tests for BasketballGraphSettings
not-heavychevy Apr 25, 2025
7c64156
Merge PitchDimensions and GraphSettings
not-heavychevy Apr 25, 2025
a70739c
graph_settings test update
not-heavychevy Apr 25, 2025
ebe0914
import bugs fix
not-heavychevy Apr 25, 2025
2dcd3fb
graph_converter refactoring
not-heavychevy Apr 26, 2025
4b96024
dataset separator bugfix
not-heavychevy Apr 26, 2025
af3a02a
added tests for graph_converter
not-heavychevy Apr 26, 2025
8a47337
moved the functionality to “features”
not-heavychevy Apr 26, 2025
633afca
tests update
not-heavychevy Apr 26, 2025
7463b1e
tests fix
not-heavychevy Apr 26, 2025
dcfa8e4
Deprecate speed/acceleration thresholds
not-heavychevy Apr 26, 2025
1b5bc3b
unify data/settings access on DefaultDataset
not-heavychevy Apr 26, 2025
7eb2081
Refactor _convert to use polars methods
not-heavychevy Apr 26, 2025
b0b9d72
Add unified graph-export API to GraphConverter
not-heavychevy Apr 26, 2025
e55d30e
added new tests for public export API
not-heavychevy Apr 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
added BasketballDataset class
  • Loading branch information
not-heavychevy committed Apr 10, 2025
commit bb7d85486502125a4669236aa16970f00b5e1ad0
6 changes: 6 additions & 0 deletions unravel/basketball/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from .dataset.dataset import BasketballDataset


__all__ = [
"BasketballDataset",
]
90 changes: 90 additions & 0 deletions unravel/basketball/dataset/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import os
import json
import tempfile
import polars as pl
import requests

try:
import py7zr
except ImportError:
py7zr = None

class BasketballDataset:
"""
Class for loading NBA tracking data.

Modes:
- URL: Loads from a 7zip archive (expects a JSON file inside).
- Local: Loads from a file path or game identifier.
"""
def __init__(self, source: str):
self.source = source
self.data = None

def load(self) -> pl.DataFrame:
"""Loads and processes data into a Polars DataFrame with columns: game_id, frame_id, team, player, x, y."""
if self.source.startswith("http"):
if py7zr is None:
raise ImportError("py7zr is required to extract 7zip archives.")
response = requests.get(self.source)
if response.status_code != 200:
raise Exception("Failed to download data from URL.")
with tempfile.NamedTemporaryFile(delete=False, suffix=".7z") as tmp_file:
tmp_file.write(response.content)
tmp_filename = tmp_file.name
with py7zr.SevenZipFile(tmp_filename, mode='r') as archive:
extract_path = tempfile.mkdtemp()
archive.extractall(path=extract_path)
os.unlink(tmp_filename)
json_file = next((os.path.join(extract_path, fname) for fname in os.listdir(extract_path) if fname.endswith('.json')), None)
if json_file is None:
raise FileNotFoundError("JSON file not found in extracted archive.")
with open(json_file, 'r', encoding='utf-8') as jf:
json_data = json.load(jf)
else:
if os.path.isfile(self.source):
with open(self.source, 'r', encoding='utf-8') as jf:
json_data = json.load(jf)
else:
file_path = os.path.join("data", "nba", f"{self.source}.json")
if not os.path.isfile(file_path):
raise FileNotFoundError(f"Game file '{self.source}.json' not found at: {file_path}")
with open(file_path, 'r', encoding='utf-8') as jf:
json_data = json.load(jf)

rows = []
game_id = json_data.get("gameid", "unknown")
events = json_data.get("events", [])
for event in events:
if "moments" in event:
for m_idx, moment in enumerate(event["moments"]):
if len(moment) >= 6:
entities = moment[5]
for entity in entities[1:]:
if len(entity) >= 4:
rows.append({
"game_id": game_id,
"frame_id": m_idx,
"team": entity[0],
"player": entity[1],
"x": entity[2],
"y": entity[3]
})
elif isinstance(json_data, list):
for rec in json_data:
rows.append({
"game_id": rec.get("game_id", game_id),
"frame_id": rec.get("frame_id"),
"team": rec.get("team"),
"player": rec.get("player"),
"x": rec.get("x"),
"y": rec.get("y")
})
self.data = pl.DataFrame(rows)
return self.data

def get_dataframe(self) -> pl.DataFrame:
"""Returns the loaded DataFrame; load() must be called first."""
if self.data is None:
raise ValueError("Data not loaded. Call load() first.")
return self.data