Session 7j: Soccer intelligence - 9 leagues, 11 signals, 6 traps, poller, prefetch, 131 new tests (1173 total)
This commit is contained in:
@@ -0,0 +1,246 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Daily soccer intelligence prefetch — run once per day.
|
||||
*
|
||||
* cron: 0 5 * * * (5am UTC, ~midnight ET — before US fixtures)
|
||||
* call: node scripts/soccer-data-prefetch.js [--leagues=WC,PL] [--dry-run]
|
||||
*
|
||||
* Why: football-data.org caps at 10 req/min and ~10/day for some
|
||||
* endpoints. We can't read these on the user request path. This script
|
||||
* batches the reads, transforms them into the per-player / per-team
|
||||
* aggregates the feature extractor consumes, and persists them to
|
||||
* Redis with conservative TTLs.
|
||||
*
|
||||
* Writes:
|
||||
* soccer:{league}:standings — raw standings from API
|
||||
* soccer:{league}:scorers — top-scorers list (projected)
|
||||
* soccer:player:{normalizedName} — per-player aggregate (per-90 rates)
|
||||
* soccer:teamdefense:{league}:{team} — team defensive aggregate + normalized rank
|
||||
*
|
||||
* Does NOT write next-match / last-fixture pointers — those are the
|
||||
* job of the poller (poller/soccer.js), which runs more frequently
|
||||
* since fixture state changes faster.
|
||||
*
|
||||
* xG data (`xg_per_90`, `xg_delta`) is left null on Day 1 — sourcing
|
||||
* it requires a soccerdata-Python bridge that's a follow-up. The
|
||||
* downstream feature extractor handles null xG gracefully.
|
||||
*
|
||||
* No DB writes. Graceful exit (code 0) when API keys are missing — the
|
||||
* script logs "skipped" and the feature extractor continues with the
|
||||
* static-data-only path.
|
||||
*/
|
||||
|
||||
const fbd = require('../src/services/adapters/footballDataAdapter');
|
||||
const { cacheSet } = require('../src/utils/redis');
|
||||
const { normalizeName } = require('../src/utils/normalize');
|
||||
|
||||
const PLAYER_TTL_SEC = 24 * 3600;
|
||||
const STANDINGS_TTL_SEC = 12 * 3600;
|
||||
const SCORERS_TTL_SEC = 6 * 3600;
|
||||
const DEFENSE_TTL_SEC = 12 * 3600;
|
||||
|
||||
function parseArgs(argv) {
|
||||
const args = { leagues: ['WC'], dryRun: false };
|
||||
for (const a of argv.slice(2)) {
|
||||
if (a.startsWith('--leagues=')) {
|
||||
args.leagues = a.slice('--leagues='.length).split(',').map((s) => s.trim().toUpperCase()).filter(Boolean);
|
||||
} else if (a === '--dry-run') {
|
||||
args.dryRun = true;
|
||||
}
|
||||
}
|
||||
// env override falls through if no CLI value was given.
|
||||
if (!process.argv.some((a) => a.startsWith('--leagues='))) {
|
||||
const env = process.env.SOCCER_LEAGUES;
|
||||
if (env) args.leagues = env.split(',').map((s) => s.trim().toUpperCase()).filter(Boolean);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
|
||||
// Project a single team's standings row into the defensive aggregate
|
||||
// the feature extractor reads. defensive_rank_norm is on a 0..1 scale
|
||||
// (0 = best defense, 1 = worst) so it slots into engine1's opp_rank_stat.
|
||||
function aggregateTeamDefense(standingsRow, allRows) {
|
||||
const playedGames = standingsRow.playedGames || standingsRow.played || 0;
|
||||
const goalsAgainst = standingsRow.goalsAgainst ?? null;
|
||||
if (!playedGames || goalsAgainst == null) return null;
|
||||
|
||||
const goalsConcededPerGame = goalsAgainst / playedGames;
|
||||
|
||||
// Normalize against the rest of the table — defensive_rank_norm = the
|
||||
// team's goals-conceded percentile (0 best, 1 worst).
|
||||
const allRates = allRows
|
||||
.map((r) => {
|
||||
const pg = r.playedGames || r.played || 0;
|
||||
if (!pg) return null;
|
||||
return (r.goalsAgainst ?? 0) / pg;
|
||||
})
|
||||
.filter((v) => Number.isFinite(v))
|
||||
.sort((a, b) => a - b);
|
||||
|
||||
let rank = allRates.findIndex((v) => v >= goalsConcededPerGame);
|
||||
if (rank === -1) rank = allRates.length - 1;
|
||||
const rankNorm = allRates.length > 1 ? rank / (allRates.length - 1) : 0;
|
||||
|
||||
// Clean sheets (not on the football-data row in the free tier — null is OK).
|
||||
const cleanSheets = standingsRow.cleanSheets ?? null;
|
||||
const cleanSheetRate = cleanSheets != null && playedGames > 0
|
||||
? cleanSheets / playedGames
|
||||
: null;
|
||||
|
||||
return {
|
||||
goals_conceded_per_game: Math.round(goalsConcededPerGame * 1000) / 1000,
|
||||
clean_sheet_rate: cleanSheetRate,
|
||||
defensive_rank: rank + 1, // 1-indexed for human reasoning
|
||||
defensive_rank_norm: rankNorm, // 0..1 for engine1
|
||||
played_games: playedGames,
|
||||
};
|
||||
}
|
||||
|
||||
// Project a single scorer row into the per-player aggregate.
|
||||
function aggregatePlayerFromScorer(scorerRow) {
|
||||
// Number(null) is 0 — explicit null check so a missing minutes field
|
||||
// doesn't pretend the player played 0 minutes (which would still
|
||||
// satisfy Number.isFinite and break the per-90 fallback).
|
||||
const minutes = scorerRow.minutesPlayed == null ? null : Number(scorerRow.minutesPlayed);
|
||||
const goals = Number(scorerRow.goals) || 0;
|
||||
const assists = Number(scorerRow.assists) || 0;
|
||||
const played = Number(scorerRow.playedMatches) || 0;
|
||||
|
||||
// Per-90 rates need minutes. The free tier sometimes omits minutes —
|
||||
// fall back to (goals / played) when missing.
|
||||
const goalsPer90 = Number.isFinite(minutes) && minutes > 0
|
||||
? Math.round((goals / (minutes / 90)) * 1000) / 1000
|
||||
: (played > 0 ? Math.round((goals / played) * 1000) / 1000 : null);
|
||||
const assistsPer90 = Number.isFinite(minutes) && minutes > 0
|
||||
? Math.round((assists / (minutes / 90)) * 1000) / 1000
|
||||
: (played > 0 ? Math.round((assists / played) * 1000) / 1000 : null);
|
||||
|
||||
const minutesPerGame = Number.isFinite(minutes) && played > 0
|
||||
? Math.round(minutes / played)
|
||||
: null;
|
||||
|
||||
return {
|
||||
team: scorerRow.team,
|
||||
position: scorerRow.position,
|
||||
nationality: scorerRow.nationality,
|
||||
goals,
|
||||
assists,
|
||||
played,
|
||||
minutes: Number.isFinite(minutes) ? minutes : null,
|
||||
goals_per_90: goalsPer90,
|
||||
assists_per_90: assistsPer90,
|
||||
minutes_per_game: minutesPerGame,
|
||||
// Day 1 — no rolling 5-match form, no xG. The feature extractor
|
||||
// falls back to season_per_90 when recent_form_per_90 is null.
|
||||
recent_form_per_90: null,
|
||||
season_per_90: goalsPer90,
|
||||
start_rate: null,
|
||||
xg_per_90: null,
|
||||
xa_per_90: null,
|
||||
xg_delta: null,
|
||||
};
|
||||
}
|
||||
|
||||
async function processLeague(league, { dryRun }) {
|
||||
const summary = { league, standings: 0, scorers: 0, players: 0, teamDefense: 0, skipped: false };
|
||||
|
||||
const [standings, scorers] = await Promise.all([
|
||||
fbd.getLeagueStandings(league),
|
||||
fbd.getLeagueScorers(league),
|
||||
]);
|
||||
|
||||
// Either null means "API unavailable" — log + bail for this league.
|
||||
if (standings === null && scorers === null) {
|
||||
summary.skipped = true;
|
||||
return summary;
|
||||
}
|
||||
|
||||
// ---- Standings → team defensive aggregates ----
|
||||
// football-data wraps standings in groups (type === 'TOTAL' has the
|
||||
// table). Flatten all `table` rows so a competition with multiple
|
||||
// groups (e.g. World Cup group stage) feeds one combined rank table.
|
||||
if (Array.isArray(standings)) {
|
||||
const allRows = [];
|
||||
for (const group of standings) {
|
||||
if (Array.isArray(group?.table)) {
|
||||
for (const row of group.table) {
|
||||
if (row?.team?.name) allRows.push({ ...row, teamName: row.team.name });
|
||||
}
|
||||
}
|
||||
}
|
||||
summary.standings = allRows.length;
|
||||
|
||||
for (const row of allRows) {
|
||||
const agg = aggregateTeamDefense(row, allRows);
|
||||
if (!agg) continue;
|
||||
const key = `soccer:teamdefense:${league.toLowerCase()}:${row.teamName}`;
|
||||
if (!dryRun) await cacheSet(key, agg, DEFENSE_TTL_SEC);
|
||||
summary.teamDefense += 1;
|
||||
}
|
||||
if (!dryRun) await cacheSet(`soccer:${league.toLowerCase()}:standings`, standings, STANDINGS_TTL_SEC);
|
||||
}
|
||||
|
||||
// ---- Scorers → per-player aggregates ----
|
||||
if (Array.isArray(scorers)) {
|
||||
summary.scorers = scorers.length;
|
||||
for (const s of scorers) {
|
||||
if (!s?.name) continue;
|
||||
const profile = aggregatePlayerFromScorer(s);
|
||||
const key = `soccer:player:${normalizeName(s.name)}`;
|
||||
if (!dryRun) await cacheSet(key, profile, PLAYER_TTL_SEC);
|
||||
summary.players += 1;
|
||||
}
|
||||
if (!dryRun) await cacheSet(`soccer:${league.toLowerCase()}:scorers`, scorers, SCORERS_TTL_SEC);
|
||||
}
|
||||
|
||||
return summary;
|
||||
}
|
||||
|
||||
async function main(argv = process.argv) {
|
||||
const args = parseArgs(argv);
|
||||
const startTs = Date.now();
|
||||
|
||||
console.log(`[soccer-prefetch] starting — leagues=${args.leagues.join(',')} dry_run=${args.dryRun}`);
|
||||
|
||||
if (!fbd.hasApiKey()) {
|
||||
console.warn('[soccer-prefetch] FOOTBALL_DATA_API_KEY not set — skipping. WC fixtures still flow via the OSS API in poller/soccer.js; non-WC leagues are no-ops until the key is configured.');
|
||||
return { skipped: true };
|
||||
}
|
||||
|
||||
const results = [];
|
||||
for (const league of args.leagues) {
|
||||
try {
|
||||
const r = await processLeague(league, args);
|
||||
results.push(r);
|
||||
console.log(`[soccer-prefetch] ${league}: standings=${r.standings} scorers=${r.scorers} players=${r.players} teamDefense=${r.teamDefense} ${r.skipped ? '(skipped: no_data)' : ''}`);
|
||||
} catch (err) {
|
||||
console.warn(`[soccer-prefetch] ${league} failed:`, err.message);
|
||||
results.push({ league, error: err.message });
|
||||
}
|
||||
}
|
||||
|
||||
const elapsed = Math.round((Date.now() - startTs) / 1000);
|
||||
console.log(`[soccer-prefetch] done in ${elapsed}s — ${results.length} leagues processed`);
|
||||
return { results, elapsedSec: elapsed };
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main().then(() => process.exit(0)).catch((err) => {
|
||||
console.error('[soccer-prefetch] fatal:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
main,
|
||||
__internals: {
|
||||
parseArgs,
|
||||
aggregateTeamDefense,
|
||||
aggregatePlayerFromScorer,
|
||||
processLeague,
|
||||
PLAYER_TTL_SEC,
|
||||
STANDINGS_TTL_SEC,
|
||||
SCORERS_TTL_SEC,
|
||||
DEFENSE_TTL_SEC,
|
||||
},
|
||||
};
|
||||
Reference in New Issue
Block a user