""" NBA/WNBA referee enrichment. Source: stats.nba.com unofficial endpoints. Crew assignments are typically published ~60-90 minutes before tip via the boxscoresummaryv2 endpoint. This DIRECTLY affects kill conditions in the grading engine: - Crews calling more fouls than league average increase foul-trouble risk - Players w/ high foul rates + foul-heavy crews → kill condition activated We intentionally keep this stateless; the orchestrator caches results. """ from __future__ import annotations import time from typing import Optional import requests from app.utils.cache import cache_get, cache_set from app.config import NBA_API_TIMEOUT, SPLITS_TTL _NBA_HEADERS = { "User-Agent": "Mozilla/5.0 (compatible; VYNDR/1.0)", "Referer": "https://www.nba.com/", "Accept": "application/json, text/plain, */*", "Accept-Language": "en-US,en;q=0.9", "x-nba-stats-origin": "stats", "x-nba-stats-token": "true", } _REF_STATS_URL = "https://stats.nba.com/stats/officialgamefindergamelogs" _BOXSCORE_URL = "https://stats.nba.com/stats/boxscoresummaryv2" # League IDs per stats.nba.com convention LEAGUE_ID = {"nba": "00", "wnba": "10"} def _safe_get(url: str, params: dict) -> Optional[dict]: """Resilient GET with a single retry. stats.nba.com is flaky.""" for attempt in (0, 1): try: resp = requests.get(url, headers=_NBA_HEADERS, params=params, timeout=NBA_API_TIMEOUT) if resp.status_code == 200: return resp.json() except requests.RequestException: pass if attempt == 0: time.sleep(1.5) return None def get_tonight_officials(game_id: str) -> dict: """ Return the crew assigned to a single game. Empty list means assignments haven't been published yet (normal until ~90 min before tip). """ if not game_id or not str(game_id).isalnum(): return {"error": "invalid game_id", "officials": []} cache_key = f"refs:officials:{game_id}" cached = cache_get(cache_key) if cached is not None: return cached data = _safe_get(_BOXSCORE_URL, {"GameID": game_id}) if not data or "resultSets" not in data: return {"officials": [], "game_id": game_id, "source": "stats.nba.com", "note": "no data"} officials = [] for rs in data.get("resultSets", []): if rs.get("name") != "Officials": continue headers = rs.get("headers") or [] for row in rs.get("rowSet") or []: record = dict(zip(headers, row)) first = record.get("FIRST_NAME", "") or "" last = record.get("LAST_NAME", "") or "" officials.append({ "official_id": record.get("OFFICIAL_ID"), "name": f"{first} {last}".strip(), "jersey_num": record.get("JERSEY_NUM"), }) break result = { "game_id": game_id, "officials": officials, "source": "stats.nba.com", } # Officials assignments don't change once published, but TTL keeps the cache fresh. cache_set(cache_key, result, ttl=SPLITS_TTL) return result def get_referee_tendencies(season: str, league: str = "nba") -> dict: """ Aggregate per-referee tendencies for the season. Returns league_avg_pf and a sorted list of refs by personal-foul rate; consumers can classify 'tight', 'average', 'generous' crews from the quartile bands. NOTE: stats.nba.com's referee dashboard endpoint changes shape every few years. If the upstream returns nothing, the orchestrator should fall back to last season's cached data. """ if league not in LEAGUE_ID: return {"error": "invalid league", "referees": []} cache_key = f"refs:tendencies:{league}:{season}" cached = cache_get(cache_key) if cached is not None: return cached # The upstream endpoint moved around 2024. We try the modern URL first # and degrade gracefully — the rest of the pipeline can use league_avg # alone to back off the foul-trouble kill condition modifier. params = { "Season": season, "SeasonType": "Regular Season", "LeagueID": LEAGUE_ID[league], "PerMode": "PerGame", } data = _safe_get("https://stats.nba.com/stats/leaguedashrefstats", params) if not data or not data.get("resultSets"): result = { "referees": [], "league_avg_pf_per_game": None, "season": season, "league": league, "note": "upstream referee dashboard unavailable", } # Short cache so we retry sooner. cache_set(cache_key, result, ttl=300) return result rs = data["resultSets"][0] headers = rs.get("headers") or [] refs = [] for row in rs.get("rowSet") or []: record = dict(zip(headers, row)) refs.append({ "name": record.get("REFEREE_NAME", ""), "games": record.get("GP", 0), "pf_per_game": record.get("PF", 0), "tech_per_game": record.get("TECH", 0), "off_foul_per_game": record.get("OFF_FOUL", 0), }) pf_values = [r["pf_per_game"] or 0 for r in refs if (r.get("pf_per_game") or 0) > 0] league_avg = (sum(pf_values) / len(pf_values)) if pf_values else None result = { "referees": refs, "league_avg_pf_per_game": league_avg, "season": season, "league": league, "source": "stats.nba.com", } cache_set(cache_key, result, ttl=SPLITS_TTL) return result