#!/usr/bin/env node /** * Populate player_id_map with ESPN + (where applicable) MLB Stats API IDs. * * node scripts/populate-player-ids.js # all active sports, prompts * node scripts/populate-player-ids.js nba # single sport * node scripts/populate-player-ids.js --dry-run # no DB writes * node scripts/populate-player-ids.js --yes # skip confirmation * * For each sport this script walks ESPN's team list, then each roster, and * upserts every player. MLB additionally name-matches to MLB Stats API for * the mlbam_id (so Statcast lookups can find the player by ID, not name). * * Failure semantics: log + continue. A 4xx on one team doesn't kill the * batch. End-of-run summary prints captured / skipped / errored counts. */ if (require.main !== module) { throw new Error('Run directly: node scripts/populate-player-ids.js'); } const path = require('path'); require('dotenv').config({ path: path.join(__dirname, '..', '.env') }); const axios = require('axios'); const readline = require('readline'); const { getSupabaseServiceClient } = require('../src/utils/supabase'); const { getActiveSports, getSportConfig } = require('../src/config/sports'); const args = process.argv.slice(2); const dryRun = args.includes('--dry-run'); const skipConfirm = args.includes('--yes'); const explicitSport = args.find((a) => !a.startsWith('--')); const ESPN_TEAMS_BASE = 'https://site.api.espn.com/apis/site/v2/sports'; const ESPN_THROTTLE_MS = 600; const MLB_PEOPLE_BASE = 'https://statsapi.mlb.com/api/v1/sports/1/players'; const espnSportPath = { nba: 'basketball/nba', wnba: 'basketball/wnba', ncaab: 'basketball/mens-college-basketball', mlb: 'baseball/mlb', nfl: 'football/nfl', ncaafb: 'football/college-football', nhl: 'hockey/nhl', }; function sleep(ms) { return new Promise((r) => setTimeout(r, ms)); } function normalizeName(name) { if (!name) return ''; return name .normalize('NFD') .replace(/[̀-ͯ]/g, '') // strip accents .toLowerCase() .replace(/\b(jr|sr|ii|iii|iv|v)\.?\b/g, '') // suffixes .replace(/[^a-z0-9\s]/g, ' ') // punctuation .replace(/\s+/g, ' ') // collapse spaces .trim(); } async function fetchJSON(url, { params } = {}) { const res = await axios.get(url, { params, timeout: 15_000 }); return res.data; } async function listEspnTeams(sport) { const sub = espnSportPath[sport]; if (!sub) throw new Error(`No ESPN path for sport ${sport}`); const data = await fetchJSON(`${ESPN_TEAMS_BASE}/${sub}/teams`); const groups = data?.sports?.[0]?.leagues?.[0]?.teams || []; return groups .map((t) => t?.team) .filter(Boolean) .map((t) => ({ id: t.id, abbreviation: t.abbreviation })); } async function fetchEspnRoster(sport, teamId) { const sub = espnSportPath[sport]; const data = await fetchJSON(`${ESPN_TEAMS_BASE}/${sub}/teams/${teamId}/roster`); const athletes = []; // Two shapes show up in the wild: a flat athletes[] (most sports), or a // grouped athletes[].items[] (football). Handle both. const top = data?.athletes; if (Array.isArray(top)) { for (const entry of top) { if (entry?.id && entry?.fullName) { athletes.push({ id: String(entry.id), name: entry.fullName }); } else if (Array.isArray(entry?.items)) { for (const a of entry.items) { if (a?.id && a?.fullName) athletes.push({ id: String(a.id), name: a.fullName }); } } } } return athletes; } async function fetchMlbAllPlayers() { const data = await fetchJSON(`${MLB_PEOPLE_BASE}`, { params: { season: new Date().getFullYear() } }); const list = data?.people || []; return list.map((p) => ({ mlbam_id: String(p.id), fullName: p.fullName, normalized: normalizeName(p.fullName), })); } async function processSport(sport, { dryRun }) { // Ensure the sport is one we have a pipeline config for; otherwise the // resolution route would never see this row. try { getSportConfig(sport); } catch { console.warn(`[skip] no SPORT_CONFIG for ${sport}`); return { captured: 0, skipped: 0, errored: 0 }; } console.log(`[${sport}] listing ESPN teams…`); const teams = await listEspnTeams(sport); await sleep(ESPN_THROTTLE_MS); const allPlayers = []; for (const team of teams) { try { const roster = await fetchEspnRoster(sport, team.id); for (const p of roster) { allPlayers.push({ display_name: p.name, normalized_name: normalizeName(p.name), espn_id: p.id, sport, team_abbr: team.abbreviation, }); } } catch (err) { console.warn(`[${sport}] team ${team.abbreviation} roster failed: ${err.message}`); } await sleep(ESPN_THROTTLE_MS); } console.log(`[${sport}] ESPN: ${allPlayers.length} players across ${teams.length} teams`); // MLB-only: name-match to MLB Stats API for mlbam_id. if (sport === 'mlb') { try { const mlbList = await fetchMlbAllPlayers(); const byName = new Map(mlbList.map((p) => [p.normalized, p.mlbam_id])); let matched = 0; for (const p of allPlayers) { const id = byName.get(p.normalized_name); if (id) { p.mlbam_id = id; matched += 1; } } console.log(`[mlb] matched mlbam_id for ${matched}/${allPlayers.length} players`); } catch (err) { console.warn(`[mlb] mlbam_id matching skipped: ${err.message}`); } } if (dryRun) { console.log(`[${sport}] dry-run — would upsert ${allPlayers.length} players`); return { captured: allPlayers.length, skipped: 0, errored: 0, dryRun: true }; } const supabase = getSupabaseServiceClient(); let captured = 0; let errored = 0; // Upsert in batches of 100 to stay friendly with PostgREST request limits. const batchSize = 100; for (let i = 0; i < allPlayers.length; i += batchSize) { const batch = allPlayers.slice(i, i + batchSize).map((p) => ({ ...p, updated_at: new Date().toISOString(), })); const { error } = await supabase .from('player_id_map') .upsert(batch, { onConflict: 'espn_id' }); if (error) { console.warn(`[${sport}] upsert batch ${i / batchSize} failed: ${error.message}`); errored += batch.length; } else { captured += batch.length; } } return { captured, errored, total: allPlayers.length }; } async function confirm(promptText) { if (skipConfirm) return true; const rl = readline.createInterface({ input: process.stdin, output: process.stdout }); const answer = await new Promise((r) => rl.question(promptText, r)); rl.close(); return /^y(es)?$/i.test(answer.trim()); } async function main() { const targets = explicitSport ? [explicitSport] : getActiveSports().map((s) => s.key); const target = process.env.SUPABASE_URL || '(unknown)'; if (!dryRun) { const ok = await confirm( `This will upsert player IDs into ${target} for ${targets.join(', ')}. Continue? (y/n) ` ); if (!ok) { console.log('aborted'); process.exit(0); } } const summary = {}; for (const sport of targets) { try { summary[sport] = await processSport(sport, { dryRun }); } catch (err) { console.error(`[${sport}] fatal: ${err.message}`); summary[sport] = { error: err.message }; } } console.log('\n=== summary ==='); console.log(JSON.stringify(summary, null, 2)); } main().catch((err) => { console.error('Unhandled:', err); process.exit(1); });