Sessions 5-7a: 955 tests, deployment ready
This commit is contained in:
@@ -0,0 +1,160 @@
|
||||
"""
|
||||
NBA/WNBA referee enrichment.
|
||||
|
||||
Source: stats.nba.com unofficial endpoints. Crew assignments are typically
|
||||
published ~60-90 minutes before tip via the boxscoresummaryv2 endpoint.
|
||||
|
||||
This DIRECTLY affects kill conditions in the grading engine:
|
||||
- Crews calling more fouls than league average increase foul-trouble risk
|
||||
- Players w/ high foul rates + foul-heavy crews → kill condition activated
|
||||
|
||||
We intentionally keep this stateless; the orchestrator caches results.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
|
||||
from app.utils.cache import cache_get, cache_set
|
||||
from app.config import NBA_API_TIMEOUT, SPLITS_TTL
|
||||
|
||||
_NBA_HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (compatible; VYNDR/1.0)",
|
||||
"Referer": "https://www.nba.com/",
|
||||
"Accept": "application/json, text/plain, */*",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"x-nba-stats-origin": "stats",
|
||||
"x-nba-stats-token": "true",
|
||||
}
|
||||
|
||||
_REF_STATS_URL = "https://stats.nba.com/stats/officialgamefindergamelogs"
|
||||
_BOXSCORE_URL = "https://stats.nba.com/stats/boxscoresummaryv2"
|
||||
|
||||
# League IDs per stats.nba.com convention
|
||||
LEAGUE_ID = {"nba": "00", "wnba": "10"}
|
||||
|
||||
|
||||
def _safe_get(url: str, params: dict) -> Optional[dict]:
|
||||
"""Resilient GET with a single retry. stats.nba.com is flaky."""
|
||||
for attempt in (0, 1):
|
||||
try:
|
||||
resp = requests.get(url, headers=_NBA_HEADERS, params=params, timeout=NBA_API_TIMEOUT)
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
except requests.RequestException:
|
||||
pass
|
||||
if attempt == 0:
|
||||
time.sleep(1.5)
|
||||
return None
|
||||
|
||||
|
||||
def get_tonight_officials(game_id: str) -> dict:
|
||||
"""
|
||||
Return the crew assigned to a single game. Empty list means assignments
|
||||
haven't been published yet (normal until ~90 min before tip).
|
||||
"""
|
||||
if not game_id or not str(game_id).isalnum():
|
||||
return {"error": "invalid game_id", "officials": []}
|
||||
|
||||
cache_key = f"refs:officials:{game_id}"
|
||||
cached = cache_get(cache_key)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
data = _safe_get(_BOXSCORE_URL, {"GameID": game_id})
|
||||
if not data or "resultSets" not in data:
|
||||
return {"officials": [], "game_id": game_id, "source": "stats.nba.com", "note": "no data"}
|
||||
|
||||
officials = []
|
||||
for rs in data.get("resultSets", []):
|
||||
if rs.get("name") != "Officials":
|
||||
continue
|
||||
headers = rs.get("headers") or []
|
||||
for row in rs.get("rowSet") or []:
|
||||
record = dict(zip(headers, row))
|
||||
first = record.get("FIRST_NAME", "") or ""
|
||||
last = record.get("LAST_NAME", "") or ""
|
||||
officials.append({
|
||||
"official_id": record.get("OFFICIAL_ID"),
|
||||
"name": f"{first} {last}".strip(),
|
||||
"jersey_num": record.get("JERSEY_NUM"),
|
||||
})
|
||||
break
|
||||
|
||||
result = {
|
||||
"game_id": game_id,
|
||||
"officials": officials,
|
||||
"source": "stats.nba.com",
|
||||
}
|
||||
# Officials assignments don't change once published, but TTL keeps the cache fresh.
|
||||
cache_set(cache_key, result, ttl=SPLITS_TTL)
|
||||
return result
|
||||
|
||||
|
||||
def get_referee_tendencies(season: str, league: str = "nba") -> dict:
|
||||
"""
|
||||
Aggregate per-referee tendencies for the season. Returns league_avg_pf
|
||||
and a sorted list of refs by personal-foul rate; consumers can classify
|
||||
'tight', 'average', 'generous' crews from the quartile bands.
|
||||
|
||||
NOTE: stats.nba.com's referee dashboard endpoint changes shape every few
|
||||
years. If the upstream returns nothing, the orchestrator should fall
|
||||
back to last season's cached data.
|
||||
"""
|
||||
if league not in LEAGUE_ID:
|
||||
return {"error": "invalid league", "referees": []}
|
||||
|
||||
cache_key = f"refs:tendencies:{league}:{season}"
|
||||
cached = cache_get(cache_key)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
# The upstream endpoint moved around 2024. We try the modern URL first
|
||||
# and degrade gracefully — the rest of the pipeline can use league_avg
|
||||
# alone to back off the foul-trouble kill condition modifier.
|
||||
params = {
|
||||
"Season": season,
|
||||
"SeasonType": "Regular Season",
|
||||
"LeagueID": LEAGUE_ID[league],
|
||||
"PerMode": "PerGame",
|
||||
}
|
||||
data = _safe_get("https://stats.nba.com/stats/leaguedashrefstats", params)
|
||||
if not data or not data.get("resultSets"):
|
||||
result = {
|
||||
"referees": [],
|
||||
"league_avg_pf_per_game": None,
|
||||
"season": season,
|
||||
"league": league,
|
||||
"note": "upstream referee dashboard unavailable",
|
||||
}
|
||||
# Short cache so we retry sooner.
|
||||
cache_set(cache_key, result, ttl=300)
|
||||
return result
|
||||
|
||||
rs = data["resultSets"][0]
|
||||
headers = rs.get("headers") or []
|
||||
refs = []
|
||||
for row in rs.get("rowSet") or []:
|
||||
record = dict(zip(headers, row))
|
||||
refs.append({
|
||||
"name": record.get("REFEREE_NAME", ""),
|
||||
"games": record.get("GP", 0),
|
||||
"pf_per_game": record.get("PF", 0),
|
||||
"tech_per_game": record.get("TECH", 0),
|
||||
"off_foul_per_game": record.get("OFF_FOUL", 0),
|
||||
})
|
||||
|
||||
pf_values = [r["pf_per_game"] or 0 for r in refs if (r.get("pf_per_game") or 0) > 0]
|
||||
league_avg = (sum(pf_values) / len(pf_values)) if pf_values else None
|
||||
|
||||
result = {
|
||||
"referees": refs,
|
||||
"league_avg_pf_per_game": league_avg,
|
||||
"season": season,
|
||||
"league": league,
|
||||
"source": "stats.nba.com",
|
||||
}
|
||||
cache_set(cache_key, result, ttl=SPLITS_TTL)
|
||||
return result
|
||||
Reference in New Issue
Block a user