Files
vyndr/scripts/populate-player-ids.js

219 lines
7.4 KiB
JavaScript

#!/usr/bin/env node
/**
* Populate player_id_map with ESPN + (where applicable) MLB Stats API IDs.
*
* node scripts/populate-player-ids.js # all active sports, prompts
* node scripts/populate-player-ids.js nba # single sport
* node scripts/populate-player-ids.js --dry-run # no DB writes
* node scripts/populate-player-ids.js --yes # skip confirmation
*
* For each sport this script walks ESPN's team list, then each roster, and
* upserts every player. MLB additionally name-matches to MLB Stats API for
* the mlbam_id (so Statcast lookups can find the player by ID, not name).
*
* Failure semantics: log + continue. A 4xx on one team doesn't kill the
* batch. End-of-run summary prints captured / skipped / errored counts.
*/
if (require.main !== module) {
throw new Error('Run directly: node scripts/populate-player-ids.js');
}
const path = require('path');
require('dotenv').config({ path: path.join(__dirname, '..', '.env') });
const axios = require('axios');
const readline = require('readline');
const { getSupabaseServiceClient } = require('../src/utils/supabase');
const { getActiveSports, getSportConfig } = require('../src/config/sports');
const args = process.argv.slice(2);
const dryRun = args.includes('--dry-run');
const skipConfirm = args.includes('--yes');
const explicitSport = args.find((a) => !a.startsWith('--'));
const ESPN_TEAMS_BASE = 'https://site.api.espn.com/apis/site/v2/sports';
const ESPN_THROTTLE_MS = 600;
const MLB_PEOPLE_BASE = 'https://statsapi.mlb.com/api/v1/sports/1/players';
const espnSportPath = {
nba: 'basketball/nba',
wnba: 'basketball/wnba',
ncaab: 'basketball/mens-college-basketball',
mlb: 'baseball/mlb',
nfl: 'football/nfl',
ncaafb: 'football/college-football',
nhl: 'hockey/nhl',
};
function sleep(ms) { return new Promise((r) => setTimeout(r, ms)); }
function normalizeName(name) {
if (!name) return '';
return name
.normalize('NFD')
.replace(/[̀-ͯ]/g, '') // strip accents
.toLowerCase()
.replace(/\b(jr|sr|ii|iii|iv|v)\.?\b/g, '') // suffixes
.replace(/[^a-z0-9\s]/g, ' ') // punctuation
.replace(/\s+/g, ' ') // collapse spaces
.trim();
}
async function fetchJSON(url, { params } = {}) {
const res = await axios.get(url, { params, timeout: 15_000 });
return res.data;
}
async function listEspnTeams(sport) {
const sub = espnSportPath[sport];
if (!sub) throw new Error(`No ESPN path for sport ${sport}`);
const data = await fetchJSON(`${ESPN_TEAMS_BASE}/${sub}/teams`);
const groups = data?.sports?.[0]?.leagues?.[0]?.teams || [];
return groups
.map((t) => t?.team)
.filter(Boolean)
.map((t) => ({ id: t.id, abbreviation: t.abbreviation }));
}
async function fetchEspnRoster(sport, teamId) {
const sub = espnSportPath[sport];
const data = await fetchJSON(`${ESPN_TEAMS_BASE}/${sub}/teams/${teamId}/roster`);
const athletes = [];
// Two shapes show up in the wild: a flat athletes[] (most sports), or a
// grouped athletes[].items[] (football). Handle both.
const top = data?.athletes;
if (Array.isArray(top)) {
for (const entry of top) {
if (entry?.id && entry?.fullName) {
athletes.push({ id: String(entry.id), name: entry.fullName });
} else if (Array.isArray(entry?.items)) {
for (const a of entry.items) {
if (a?.id && a?.fullName) athletes.push({ id: String(a.id), name: a.fullName });
}
}
}
}
return athletes;
}
async function fetchMlbAllPlayers() {
const data = await fetchJSON(`${MLB_PEOPLE_BASE}`, { params: { season: new Date().getFullYear() } });
const list = data?.people || [];
return list.map((p) => ({
mlbam_id: String(p.id),
fullName: p.fullName,
normalized: normalizeName(p.fullName),
}));
}
async function processSport(sport, { dryRun }) {
// Ensure the sport is one we have a pipeline config for; otherwise the
// resolution route would never see this row.
try { getSportConfig(sport); }
catch { console.warn(`[skip] no SPORT_CONFIG for ${sport}`); return { captured: 0, skipped: 0, errored: 0 }; }
console.log(`[${sport}] listing ESPN teams…`);
const teams = await listEspnTeams(sport);
await sleep(ESPN_THROTTLE_MS);
const allPlayers = [];
for (const team of teams) {
try {
const roster = await fetchEspnRoster(sport, team.id);
for (const p of roster) {
allPlayers.push({
display_name: p.name,
normalized_name: normalizeName(p.name),
espn_id: p.id,
sport,
team_abbr: team.abbreviation,
});
}
} catch (err) {
console.warn(`[${sport}] team ${team.abbreviation} roster failed: ${err.message}`);
}
await sleep(ESPN_THROTTLE_MS);
}
console.log(`[${sport}] ESPN: ${allPlayers.length} players across ${teams.length} teams`);
// MLB-only: name-match to MLB Stats API for mlbam_id.
if (sport === 'mlb') {
try {
const mlbList = await fetchMlbAllPlayers();
const byName = new Map(mlbList.map((p) => [p.normalized, p.mlbam_id]));
let matched = 0;
for (const p of allPlayers) {
const id = byName.get(p.normalized_name);
if (id) { p.mlbam_id = id; matched += 1; }
}
console.log(`[mlb] matched mlbam_id for ${matched}/${allPlayers.length} players`);
} catch (err) {
console.warn(`[mlb] mlbam_id matching skipped: ${err.message}`);
}
}
if (dryRun) {
console.log(`[${sport}] dry-run — would upsert ${allPlayers.length} players`);
return { captured: allPlayers.length, skipped: 0, errored: 0, dryRun: true };
}
const supabase = getSupabaseServiceClient();
let captured = 0;
let errored = 0;
// Upsert in batches of 100 to stay friendly with PostgREST request limits.
const batchSize = 100;
for (let i = 0; i < allPlayers.length; i += batchSize) {
const batch = allPlayers.slice(i, i + batchSize).map((p) => ({
...p,
updated_at: new Date().toISOString(),
}));
const { error } = await supabase
.from('player_id_map')
.upsert(batch, { onConflict: 'espn_id' });
if (error) {
console.warn(`[${sport}] upsert batch ${i / batchSize} failed: ${error.message}`);
errored += batch.length;
} else {
captured += batch.length;
}
}
return { captured, errored, total: allPlayers.length };
}
async function confirm(promptText) {
if (skipConfirm) return true;
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
const answer = await new Promise((r) => rl.question(promptText, r));
rl.close();
return /^y(es)?$/i.test(answer.trim());
}
async function main() {
const targets = explicitSport ? [explicitSport] : getActiveSports().map((s) => s.key);
const target = process.env.SUPABASE_URL || '(unknown)';
if (!dryRun) {
const ok = await confirm(
`This will upsert player IDs into ${target} for ${targets.join(', ')}. Continue? (y/n) `
);
if (!ok) { console.log('aborted'); process.exit(0); }
}
const summary = {};
for (const sport of targets) {
try {
summary[sport] = await processSport(sport, { dryRun });
} catch (err) {
console.error(`[${sport}] fatal: ${err.message}`);
summary[sport] = { error: err.message };
}
}
console.log('\n=== summary ===');
console.log(JSON.stringify(summary, null, 2));
}
main().catch((err) => {
console.error('Unhandled:', err);
process.exit(1);
});