219 lines
7.4 KiB
JavaScript
219 lines
7.4 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Populate player_id_map with ESPN + (where applicable) MLB Stats API IDs.
|
|
*
|
|
* node scripts/populate-player-ids.js # all active sports, prompts
|
|
* node scripts/populate-player-ids.js nba # single sport
|
|
* node scripts/populate-player-ids.js --dry-run # no DB writes
|
|
* node scripts/populate-player-ids.js --yes # skip confirmation
|
|
*
|
|
* For each sport this script walks ESPN's team list, then each roster, and
|
|
* upserts every player. MLB additionally name-matches to MLB Stats API for
|
|
* the mlbam_id (so Statcast lookups can find the player by ID, not name).
|
|
*
|
|
* Failure semantics: log + continue. A 4xx on one team doesn't kill the
|
|
* batch. End-of-run summary prints captured / skipped / errored counts.
|
|
*/
|
|
|
|
if (require.main !== module) {
|
|
throw new Error('Run directly: node scripts/populate-player-ids.js');
|
|
}
|
|
|
|
const path = require('path');
|
|
require('dotenv').config({ path: path.join(__dirname, '..', '.env') });
|
|
|
|
const axios = require('axios');
|
|
const readline = require('readline');
|
|
const { getSupabaseServiceClient } = require('../src/utils/supabase');
|
|
const { getActiveSports, getSportConfig } = require('../src/config/sports');
|
|
|
|
const args = process.argv.slice(2);
|
|
const dryRun = args.includes('--dry-run');
|
|
const skipConfirm = args.includes('--yes');
|
|
const explicitSport = args.find((a) => !a.startsWith('--'));
|
|
|
|
const ESPN_TEAMS_BASE = 'https://site.api.espn.com/apis/site/v2/sports';
|
|
const ESPN_THROTTLE_MS = 600;
|
|
const MLB_PEOPLE_BASE = 'https://statsapi.mlb.com/api/v1/sports/1/players';
|
|
|
|
const espnSportPath = {
|
|
nba: 'basketball/nba',
|
|
wnba: 'basketball/wnba',
|
|
ncaab: 'basketball/mens-college-basketball',
|
|
mlb: 'baseball/mlb',
|
|
nfl: 'football/nfl',
|
|
ncaafb: 'football/college-football',
|
|
nhl: 'hockey/nhl',
|
|
};
|
|
|
|
function sleep(ms) { return new Promise((r) => setTimeout(r, ms)); }
|
|
|
|
function normalizeName(name) {
|
|
if (!name) return '';
|
|
return name
|
|
.normalize('NFD')
|
|
.replace(/[̀-ͯ]/g, '') // strip accents
|
|
.toLowerCase()
|
|
.replace(/\b(jr|sr|ii|iii|iv|v)\.?\b/g, '') // suffixes
|
|
.replace(/[^a-z0-9\s]/g, ' ') // punctuation
|
|
.replace(/\s+/g, ' ') // collapse spaces
|
|
.trim();
|
|
}
|
|
|
|
async function fetchJSON(url, { params } = {}) {
|
|
const res = await axios.get(url, { params, timeout: 15_000 });
|
|
return res.data;
|
|
}
|
|
|
|
async function listEspnTeams(sport) {
|
|
const sub = espnSportPath[sport];
|
|
if (!sub) throw new Error(`No ESPN path for sport ${sport}`);
|
|
const data = await fetchJSON(`${ESPN_TEAMS_BASE}/${sub}/teams`);
|
|
const groups = data?.sports?.[0]?.leagues?.[0]?.teams || [];
|
|
return groups
|
|
.map((t) => t?.team)
|
|
.filter(Boolean)
|
|
.map((t) => ({ id: t.id, abbreviation: t.abbreviation }));
|
|
}
|
|
|
|
async function fetchEspnRoster(sport, teamId) {
|
|
const sub = espnSportPath[sport];
|
|
const data = await fetchJSON(`${ESPN_TEAMS_BASE}/${sub}/teams/${teamId}/roster`);
|
|
const athletes = [];
|
|
// Two shapes show up in the wild: a flat athletes[] (most sports), or a
|
|
// grouped athletes[].items[] (football). Handle both.
|
|
const top = data?.athletes;
|
|
if (Array.isArray(top)) {
|
|
for (const entry of top) {
|
|
if (entry?.id && entry?.fullName) {
|
|
athletes.push({ id: String(entry.id), name: entry.fullName });
|
|
} else if (Array.isArray(entry?.items)) {
|
|
for (const a of entry.items) {
|
|
if (a?.id && a?.fullName) athletes.push({ id: String(a.id), name: a.fullName });
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return athletes;
|
|
}
|
|
|
|
async function fetchMlbAllPlayers() {
|
|
const data = await fetchJSON(`${MLB_PEOPLE_BASE}`, { params: { season: new Date().getFullYear() } });
|
|
const list = data?.people || [];
|
|
return list.map((p) => ({
|
|
mlbam_id: String(p.id),
|
|
fullName: p.fullName,
|
|
normalized: normalizeName(p.fullName),
|
|
}));
|
|
}
|
|
|
|
async function processSport(sport, { dryRun }) {
|
|
// Ensure the sport is one we have a pipeline config for; otherwise the
|
|
// resolution route would never see this row.
|
|
try { getSportConfig(sport); }
|
|
catch { console.warn(`[skip] no SPORT_CONFIG for ${sport}`); return { captured: 0, skipped: 0, errored: 0 }; }
|
|
|
|
console.log(`[${sport}] listing ESPN teams…`);
|
|
const teams = await listEspnTeams(sport);
|
|
await sleep(ESPN_THROTTLE_MS);
|
|
|
|
const allPlayers = [];
|
|
for (const team of teams) {
|
|
try {
|
|
const roster = await fetchEspnRoster(sport, team.id);
|
|
for (const p of roster) {
|
|
allPlayers.push({
|
|
display_name: p.name,
|
|
normalized_name: normalizeName(p.name),
|
|
espn_id: p.id,
|
|
sport,
|
|
team_abbr: team.abbreviation,
|
|
});
|
|
}
|
|
} catch (err) {
|
|
console.warn(`[${sport}] team ${team.abbreviation} roster failed: ${err.message}`);
|
|
}
|
|
await sleep(ESPN_THROTTLE_MS);
|
|
}
|
|
console.log(`[${sport}] ESPN: ${allPlayers.length} players across ${teams.length} teams`);
|
|
|
|
// MLB-only: name-match to MLB Stats API for mlbam_id.
|
|
if (sport === 'mlb') {
|
|
try {
|
|
const mlbList = await fetchMlbAllPlayers();
|
|
const byName = new Map(mlbList.map((p) => [p.normalized, p.mlbam_id]));
|
|
let matched = 0;
|
|
for (const p of allPlayers) {
|
|
const id = byName.get(p.normalized_name);
|
|
if (id) { p.mlbam_id = id; matched += 1; }
|
|
}
|
|
console.log(`[mlb] matched mlbam_id for ${matched}/${allPlayers.length} players`);
|
|
} catch (err) {
|
|
console.warn(`[mlb] mlbam_id matching skipped: ${err.message}`);
|
|
}
|
|
}
|
|
|
|
if (dryRun) {
|
|
console.log(`[${sport}] dry-run — would upsert ${allPlayers.length} players`);
|
|
return { captured: allPlayers.length, skipped: 0, errored: 0, dryRun: true };
|
|
}
|
|
|
|
const supabase = getSupabaseServiceClient();
|
|
let captured = 0;
|
|
let errored = 0;
|
|
// Upsert in batches of 100 to stay friendly with PostgREST request limits.
|
|
const batchSize = 100;
|
|
for (let i = 0; i < allPlayers.length; i += batchSize) {
|
|
const batch = allPlayers.slice(i, i + batchSize).map((p) => ({
|
|
...p,
|
|
updated_at: new Date().toISOString(),
|
|
}));
|
|
const { error } = await supabase
|
|
.from('player_id_map')
|
|
.upsert(batch, { onConflict: 'espn_id' });
|
|
if (error) {
|
|
console.warn(`[${sport}] upsert batch ${i / batchSize} failed: ${error.message}`);
|
|
errored += batch.length;
|
|
} else {
|
|
captured += batch.length;
|
|
}
|
|
}
|
|
return { captured, errored, total: allPlayers.length };
|
|
}
|
|
|
|
async function confirm(promptText) {
|
|
if (skipConfirm) return true;
|
|
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
const answer = await new Promise((r) => rl.question(promptText, r));
|
|
rl.close();
|
|
return /^y(es)?$/i.test(answer.trim());
|
|
}
|
|
|
|
async function main() {
|
|
const targets = explicitSport ? [explicitSport] : getActiveSports().map((s) => s.key);
|
|
const target = process.env.SUPABASE_URL || '(unknown)';
|
|
if (!dryRun) {
|
|
const ok = await confirm(
|
|
`This will upsert player IDs into ${target} for ${targets.join(', ')}. Continue? (y/n) `
|
|
);
|
|
if (!ok) { console.log('aborted'); process.exit(0); }
|
|
}
|
|
|
|
const summary = {};
|
|
for (const sport of targets) {
|
|
try {
|
|
summary[sport] = await processSport(sport, { dryRun });
|
|
} catch (err) {
|
|
console.error(`[${sport}] fatal: ${err.message}`);
|
|
summary[sport] = { error: err.message };
|
|
}
|
|
}
|
|
console.log('\n=== summary ===');
|
|
console.log(JSON.stringify(summary, null, 2));
|
|
}
|
|
|
|
main().catch((err) => {
|
|
console.error('Unhandled:', err);
|
|
process.exit(1);
|
|
});
|