161 lines
5.4 KiB
Python
161 lines
5.4 KiB
Python
"""
|
|
NBA/WNBA referee enrichment.
|
|
|
|
Source: stats.nba.com unofficial endpoints. Crew assignments are typically
|
|
published ~60-90 minutes before tip via the boxscoresummaryv2 endpoint.
|
|
|
|
This DIRECTLY affects kill conditions in the grading engine:
|
|
- Crews calling more fouls than league average increase foul-trouble risk
|
|
- Players w/ high foul rates + foul-heavy crews → kill condition activated
|
|
|
|
We intentionally keep this stateless; the orchestrator caches results.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import time
|
|
from typing import Optional
|
|
|
|
import requests
|
|
|
|
from app.utils.cache import cache_get, cache_set
|
|
from app.config import NBA_API_TIMEOUT, SPLITS_TTL
|
|
|
|
_NBA_HEADERS = {
|
|
"User-Agent": "Mozilla/5.0 (compatible; VYNDR/1.0)",
|
|
"Referer": "https://www.nba.com/",
|
|
"Accept": "application/json, text/plain, */*",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
"x-nba-stats-origin": "stats",
|
|
"x-nba-stats-token": "true",
|
|
}
|
|
|
|
_REF_STATS_URL = "https://stats.nba.com/stats/officialgamefindergamelogs"
|
|
_BOXSCORE_URL = "https://stats.nba.com/stats/boxscoresummaryv2"
|
|
|
|
# League IDs per stats.nba.com convention
|
|
LEAGUE_ID = {"nba": "00", "wnba": "10"}
|
|
|
|
|
|
def _safe_get(url: str, params: dict) -> Optional[dict]:
|
|
"""Resilient GET with a single retry. stats.nba.com is flaky."""
|
|
for attempt in (0, 1):
|
|
try:
|
|
resp = requests.get(url, headers=_NBA_HEADERS, params=params, timeout=NBA_API_TIMEOUT)
|
|
if resp.status_code == 200:
|
|
return resp.json()
|
|
except requests.RequestException:
|
|
pass
|
|
if attempt == 0:
|
|
time.sleep(1.5)
|
|
return None
|
|
|
|
|
|
def get_tonight_officials(game_id: str) -> dict:
|
|
"""
|
|
Return the crew assigned to a single game. Empty list means assignments
|
|
haven't been published yet (normal until ~90 min before tip).
|
|
"""
|
|
if not game_id or not str(game_id).isalnum():
|
|
return {"error": "invalid game_id", "officials": []}
|
|
|
|
cache_key = f"refs:officials:{game_id}"
|
|
cached = cache_get(cache_key)
|
|
if cached is not None:
|
|
return cached
|
|
|
|
data = _safe_get(_BOXSCORE_URL, {"GameID": game_id})
|
|
if not data or "resultSets" not in data:
|
|
return {"officials": [], "game_id": game_id, "source": "stats.nba.com", "note": "no data"}
|
|
|
|
officials = []
|
|
for rs in data.get("resultSets", []):
|
|
if rs.get("name") != "Officials":
|
|
continue
|
|
headers = rs.get("headers") or []
|
|
for row in rs.get("rowSet") or []:
|
|
record = dict(zip(headers, row))
|
|
first = record.get("FIRST_NAME", "") or ""
|
|
last = record.get("LAST_NAME", "") or ""
|
|
officials.append({
|
|
"official_id": record.get("OFFICIAL_ID"),
|
|
"name": f"{first} {last}".strip(),
|
|
"jersey_num": record.get("JERSEY_NUM"),
|
|
})
|
|
break
|
|
|
|
result = {
|
|
"game_id": game_id,
|
|
"officials": officials,
|
|
"source": "stats.nba.com",
|
|
}
|
|
# Officials assignments don't change once published, but TTL keeps the cache fresh.
|
|
cache_set(cache_key, result, ttl=SPLITS_TTL)
|
|
return result
|
|
|
|
|
|
def get_referee_tendencies(season: str, league: str = "nba") -> dict:
|
|
"""
|
|
Aggregate per-referee tendencies for the season. Returns league_avg_pf
|
|
and a sorted list of refs by personal-foul rate; consumers can classify
|
|
'tight', 'average', 'generous' crews from the quartile bands.
|
|
|
|
NOTE: stats.nba.com's referee dashboard endpoint changes shape every few
|
|
years. If the upstream returns nothing, the orchestrator should fall
|
|
back to last season's cached data.
|
|
"""
|
|
if league not in LEAGUE_ID:
|
|
return {"error": "invalid league", "referees": []}
|
|
|
|
cache_key = f"refs:tendencies:{league}:{season}"
|
|
cached = cache_get(cache_key)
|
|
if cached is not None:
|
|
return cached
|
|
|
|
# The upstream endpoint moved around 2024. We try the modern URL first
|
|
# and degrade gracefully — the rest of the pipeline can use league_avg
|
|
# alone to back off the foul-trouble kill condition modifier.
|
|
params = {
|
|
"Season": season,
|
|
"SeasonType": "Regular Season",
|
|
"LeagueID": LEAGUE_ID[league],
|
|
"PerMode": "PerGame",
|
|
}
|
|
data = _safe_get("https://stats.nba.com/stats/leaguedashrefstats", params)
|
|
if not data or not data.get("resultSets"):
|
|
result = {
|
|
"referees": [],
|
|
"league_avg_pf_per_game": None,
|
|
"season": season,
|
|
"league": league,
|
|
"note": "upstream referee dashboard unavailable",
|
|
}
|
|
# Short cache so we retry sooner.
|
|
cache_set(cache_key, result, ttl=300)
|
|
return result
|
|
|
|
rs = data["resultSets"][0]
|
|
headers = rs.get("headers") or []
|
|
refs = []
|
|
for row in rs.get("rowSet") or []:
|
|
record = dict(zip(headers, row))
|
|
refs.append({
|
|
"name": record.get("REFEREE_NAME", ""),
|
|
"games": record.get("GP", 0),
|
|
"pf_per_game": record.get("PF", 0),
|
|
"tech_per_game": record.get("TECH", 0),
|
|
"off_foul_per_game": record.get("OFF_FOUL", 0),
|
|
})
|
|
|
|
pf_values = [r["pf_per_game"] or 0 for r in refs if (r.get("pf_per_game") or 0) > 0]
|
|
league_avg = (sum(pf_values) / len(pf_values)) if pf_values else None
|
|
|
|
result = {
|
|
"referees": refs,
|
|
"league_avg_pf_per_game": league_avg,
|
|
"season": season,
|
|
"league": league,
|
|
"source": "stats.nba.com",
|
|
}
|
|
cache_set(cache_key, result, ttl=SPLITS_TTL)
|
|
return result
|