Files
vyndr/scripts/seed_historical.py
T

106 lines
3.3 KiB
Python

"""
seed_historical.py — One-time historical data seeder for VYNDR.
Run ONCE before launch to backfill coaching and player-out data.
Usage:
python scripts/seed_historical.py
"""
import sys
import os
import time
import logging
# Allow imports from src/services/python
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src', 'services', 'python'))
from coaching_parser import parse_nba_coaching_from_game_id, parse_mlb_coaching_from_game_id
from player_outs import find_and_log_historical_player_outs
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s'
)
logger = logging.getLogger(__name__)
def seed_nba_coaching(season='2024-25'):
"""Seed NBA coaching data from LeagueGameLog for a full season."""
from nba_api.stats.endpoints import LeagueGameLog
logger.info(f"Fetching NBA game log for season {season}...")
game_log = LeagueGameLog(season=season, season_type_all_star='Regular Season')
df = game_log.get_data_frames()[0]
game_ids = df['GAME_ID'].unique()
total = len(game_ids)
logger.info(f"Found {total} unique NBA games to process.")
for i, game_id in enumerate(game_ids, start=1):
try:
parse_nba_coaching_from_game_id(game_id)
except Exception as e:
logger.error(f"NBA game {game_id} failed: {e}")
if i % 50 == 0:
logger.info(f"NBA coaching progress: {i}/{total} games processed")
time.sleep(0.6)
logger.info(f"NBA coaching seed complete. {total} games processed.")
def seed_mlb_coaching(season=2024):
"""Seed MLB coaching data from statsapi schedule for a full season."""
import statsapi
start_date = f'{season}-03-28'
end_date = f'{season}-09-29'
logger.info(f"Fetching MLB schedule for {start_date} to {end_date}...")
schedule = statsapi.schedule(start_date=start_date, end_date=end_date)
game_ids = [g['game_id'] for g in schedule]
total = len(game_ids)
logger.info(f"Found {total} MLB games to process.")
for i, game_id in enumerate(game_ids, start=1):
try:
parse_mlb_coaching_from_game_id(game_id)
except Exception as e:
logger.error(f"MLB game {game_id} failed: {e}")
if i % 100 == 0:
logger.info(f"MLB coaching progress: {i}/{total} games processed")
time.sleep(0.3)
logger.info(f"MLB coaching seed complete. {total} games processed.")
def seed_player_out_history(season='2024-25'):
"""Seed historical player-out data for a given season."""
logger.info(f"Seeding player-out history for season {season}...")
try:
find_and_log_historical_player_outs(season=season)
logger.info("Player-out history seed complete.")
except Exception as e:
logger.error(f"Player-out history seed failed: {e}")
raise
if __name__ == '__main__':
logger.info("=== VYNDR Historical Data Seeder ===")
logger.info("This should be run ONCE before launch.\n")
logger.info("--- Step 1/3: NBA Coaching ---")
seed_nba_coaching(season='2024-25')
logger.info("--- Step 2/3: MLB Coaching ---")
seed_mlb_coaching(season=2024)
logger.info("--- Step 3/3: Player-Out History ---")
seed_player_out_history(season='2024-25')
logger.info("=== All historical seeds complete. ===")