Sessions 5-7a: 955 tests, deployment ready

2026-06-08 18:35:13 -04:00
parent 06b82624a2
commit 1fa04dc776
371 changed files with 49366 additions and 955 deletions
@@ -0,0 +1,160 @@
+"""
+NBA/WNBA referee enrichment.
+
+Source: stats.nba.com unofficial endpoints. Crew assignments are typically
+published ~60-90 minutes before tip via the boxscoresummaryv2 endpoint.
+
+This DIRECTLY affects kill conditions in the grading engine:
+- Crews calling more fouls than league average increase foul-trouble risk
+- Players w/ high foul rates + foul-heavy crews → kill condition activated
+
+We intentionally keep this stateless; the orchestrator caches results.
+"""
+from __future__ import annotations
+
+import time
+from typing import Optional
+
+import requests
+
+from app.utils.cache import cache_get, cache_set
+from app.config import NBA_API_TIMEOUT, SPLITS_TTL
+
+_NBA_HEADERS = {
+    "User-Agent": "Mozilla/5.0 (compatible; VYNDR/1.0)",
+    "Referer": "https://www.nba.com/",
+    "Accept": "application/json, text/plain, */*",
+    "Accept-Language": "en-US,en;q=0.9",
+    "x-nba-stats-origin": "stats",
+    "x-nba-stats-token": "true",
+}
+
+_REF_STATS_URL = "https://stats.nba.com/stats/officialgamefindergamelogs"
+_BOXSCORE_URL = "https://stats.nba.com/stats/boxscoresummaryv2"
+
+# League IDs per stats.nba.com convention
+LEAGUE_ID = {"nba": "00", "wnba": "10"}
+
+
+def _safe_get(url: str, params: dict) -> Optional[dict]:
+    """Resilient GET with a single retry. stats.nba.com is flaky."""
+    for attempt in (0, 1):
+        try:
+            resp = requests.get(url, headers=_NBA_HEADERS, params=params, timeout=NBA_API_TIMEOUT)
+            if resp.status_code == 200:
+                return resp.json()
+        except requests.RequestException:
+            pass
+        if attempt == 0:
+            time.sleep(1.5)
+    return None
+
+
+def get_tonight_officials(game_id: str) -> dict:
+    """
+    Return the crew assigned to a single game. Empty list means assignments
+    haven't been published yet (normal until ~90 min before tip).
+    """
+    if not game_id or not str(game_id).isalnum():
+        return {"error": "invalid game_id", "officials": []}
+
+    cache_key = f"refs:officials:{game_id}"
+    cached = cache_get(cache_key)
+    if cached is not None:
+        return cached
+
+    data = _safe_get(_BOXSCORE_URL, {"GameID": game_id})
+    if not data or "resultSets" not in data:
+        return {"officials": [], "game_id": game_id, "source": "stats.nba.com", "note": "no data"}
+
+    officials = []
+    for rs in data.get("resultSets", []):
+        if rs.get("name") != "Officials":
+            continue
+        headers = rs.get("headers") or []
+        for row in rs.get("rowSet") or []:
+            record = dict(zip(headers, row))
+            first = record.get("FIRST_NAME", "") or ""
+            last = record.get("LAST_NAME", "") or ""
+            officials.append({
+                "official_id": record.get("OFFICIAL_ID"),
+                "name": f"{first} {last}".strip(),
+                "jersey_num": record.get("JERSEY_NUM"),
+            })
+        break
+
+    result = {
+        "game_id": game_id,
+        "officials": officials,
+        "source": "stats.nba.com",
+    }
+    # Officials assignments don't change once published, but TTL keeps the cache fresh.
+    cache_set(cache_key, result, ttl=SPLITS_TTL)
+    return result
+
+
+def get_referee_tendencies(season: str, league: str = "nba") -> dict:
+    """
+    Aggregate per-referee tendencies for the season. Returns league_avg_pf
+    and a sorted list of refs by personal-foul rate; consumers can classify
+    'tight', 'average', 'generous' crews from the quartile bands.
+
+    NOTE: stats.nba.com's referee dashboard endpoint changes shape every few
+    years. If the upstream returns nothing, the orchestrator should fall
+    back to last season's cached data.
+    """
+    if league not in LEAGUE_ID:
+        return {"error": "invalid league", "referees": []}
+
+    cache_key = f"refs:tendencies:{league}:{season}"
+    cached = cache_get(cache_key)
+    if cached is not None:
+        return cached
+
+    # The upstream endpoint moved around 2024. We try the modern URL first
+    # and degrade gracefully — the rest of the pipeline can use league_avg
+    # alone to back off the foul-trouble kill condition modifier.
+    params = {
+        "Season": season,
+        "SeasonType": "Regular Season",
+        "LeagueID": LEAGUE_ID[league],
+        "PerMode": "PerGame",
+    }
+    data = _safe_get("https://stats.nba.com/stats/leaguedashrefstats", params)
+    if not data or not data.get("resultSets"):
+        result = {
+            "referees": [],
+            "league_avg_pf_per_game": None,
+            "season": season,
+            "league": league,
+            "note": "upstream referee dashboard unavailable",
+        }
+        # Short cache so we retry sooner.
+        cache_set(cache_key, result, ttl=300)
+        return result
+
+    rs = data["resultSets"][0]
+    headers = rs.get("headers") or []
+    refs = []
+    for row in rs.get("rowSet") or []:
+        record = dict(zip(headers, row))
+        refs.append({
+            "name": record.get("REFEREE_NAME", ""),
+            "games": record.get("GP", 0),
+            "pf_per_game": record.get("PF", 0),
+            "tech_per_game": record.get("TECH", 0),
+            "off_foul_per_game": record.get("OFF_FOUL", 0),
+        })
+
+    pf_values = [r["pf_per_game"] or 0 for r in refs if (r.get("pf_per_game") or 0) > 0]
+    league_avg = (sum(pf_values) / len(pf_values)) if pf_values else None
+
+    result = {
+        "referees": refs,
+        "league_avg_pf_per_game": league_avg,
+        "season": season,
+        "league": league,
+        "source": "stats.nba.com",
+    }
+    cache_set(cache_key, result, ttl=SPLITS_TTL)
+    return result