Sessions 5-7a: 955 tests, deployment ready

This commit is contained in:
Kev
2026-06-08 18:35:13 -04:00
parent 06b82624a2
commit 1fa04dc776
371 changed files with 49366 additions and 955 deletions
+160
View File
@@ -0,0 +1,160 @@
"""
NBA/WNBA referee enrichment.
Source: stats.nba.com unofficial endpoints. Crew assignments are typically
published ~60-90 minutes before tip via the boxscoresummaryv2 endpoint.
This DIRECTLY affects kill conditions in the grading engine:
- Crews calling more fouls than league average increase foul-trouble risk
- Players w/ high foul rates + foul-heavy crews → kill condition activated
We intentionally keep this stateless; the orchestrator caches results.
"""
from __future__ import annotations
import time
from typing import Optional
import requests
from app.utils.cache import cache_get, cache_set
from app.config import NBA_API_TIMEOUT, SPLITS_TTL
_NBA_HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; VYNDR/1.0)",
"Referer": "https://www.nba.com/",
"Accept": "application/json, text/plain, */*",
"Accept-Language": "en-US,en;q=0.9",
"x-nba-stats-origin": "stats",
"x-nba-stats-token": "true",
}
_REF_STATS_URL = "https://stats.nba.com/stats/officialgamefindergamelogs"
_BOXSCORE_URL = "https://stats.nba.com/stats/boxscoresummaryv2"
# League IDs per stats.nba.com convention
LEAGUE_ID = {"nba": "00", "wnba": "10"}
def _safe_get(url: str, params: dict) -> Optional[dict]:
"""Resilient GET with a single retry. stats.nba.com is flaky."""
for attempt in (0, 1):
try:
resp = requests.get(url, headers=_NBA_HEADERS, params=params, timeout=NBA_API_TIMEOUT)
if resp.status_code == 200:
return resp.json()
except requests.RequestException:
pass
if attempt == 0:
time.sleep(1.5)
return None
def get_tonight_officials(game_id: str) -> dict:
"""
Return the crew assigned to a single game. Empty list means assignments
haven't been published yet (normal until ~90 min before tip).
"""
if not game_id or not str(game_id).isalnum():
return {"error": "invalid game_id", "officials": []}
cache_key = f"refs:officials:{game_id}"
cached = cache_get(cache_key)
if cached is not None:
return cached
data = _safe_get(_BOXSCORE_URL, {"GameID": game_id})
if not data or "resultSets" not in data:
return {"officials": [], "game_id": game_id, "source": "stats.nba.com", "note": "no data"}
officials = []
for rs in data.get("resultSets", []):
if rs.get("name") != "Officials":
continue
headers = rs.get("headers") or []
for row in rs.get("rowSet") or []:
record = dict(zip(headers, row))
first = record.get("FIRST_NAME", "") or ""
last = record.get("LAST_NAME", "") or ""
officials.append({
"official_id": record.get("OFFICIAL_ID"),
"name": f"{first} {last}".strip(),
"jersey_num": record.get("JERSEY_NUM"),
})
break
result = {
"game_id": game_id,
"officials": officials,
"source": "stats.nba.com",
}
# Officials assignments don't change once published, but TTL keeps the cache fresh.
cache_set(cache_key, result, ttl=SPLITS_TTL)
return result
def get_referee_tendencies(season: str, league: str = "nba") -> dict:
"""
Aggregate per-referee tendencies for the season. Returns league_avg_pf
and a sorted list of refs by personal-foul rate; consumers can classify
'tight', 'average', 'generous' crews from the quartile bands.
NOTE: stats.nba.com's referee dashboard endpoint changes shape every few
years. If the upstream returns nothing, the orchestrator should fall
back to last season's cached data.
"""
if league not in LEAGUE_ID:
return {"error": "invalid league", "referees": []}
cache_key = f"refs:tendencies:{league}:{season}"
cached = cache_get(cache_key)
if cached is not None:
return cached
# The upstream endpoint moved around 2024. We try the modern URL first
# and degrade gracefully — the rest of the pipeline can use league_avg
# alone to back off the foul-trouble kill condition modifier.
params = {
"Season": season,
"SeasonType": "Regular Season",
"LeagueID": LEAGUE_ID[league],
"PerMode": "PerGame",
}
data = _safe_get("https://stats.nba.com/stats/leaguedashrefstats", params)
if not data or not data.get("resultSets"):
result = {
"referees": [],
"league_avg_pf_per_game": None,
"season": season,
"league": league,
"note": "upstream referee dashboard unavailable",
}
# Short cache so we retry sooner.
cache_set(cache_key, result, ttl=300)
return result
rs = data["resultSets"][0]
headers = rs.get("headers") or []
refs = []
for row in rs.get("rowSet") or []:
record = dict(zip(headers, row))
refs.append({
"name": record.get("REFEREE_NAME", ""),
"games": record.get("GP", 0),
"pf_per_game": record.get("PF", 0),
"tech_per_game": record.get("TECH", 0),
"off_foul_per_game": record.get("OFF_FOUL", 0),
})
pf_values = [r["pf_per_game"] or 0 for r in refs if (r.get("pf_per_game") or 0) > 0]
league_avg = (sum(pf_values) / len(pf_values)) if pf_values else None
result = {
"referees": refs,
"league_avg_pf_per_game": league_avg,
"season": season,
"league": league,
"source": "stats.nba.com",
}
cache_set(cache_key, result, ttl=SPLITS_TTL)
return result