106 lines
3.3 KiB
Python
106 lines
3.3 KiB
Python
"""
|
|
seed_historical.py — One-time historical data seeder for VYNDR.
|
|
Run ONCE before launch to backfill coaching and player-out data.
|
|
|
|
Usage:
|
|
python scripts/seed_historical.py
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import time
|
|
import logging
|
|
|
|
# Allow imports from src/services/python
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src', 'services', 'python'))
|
|
|
|
from coaching_parser import parse_nba_coaching_from_game_id, parse_mlb_coaching_from_game_id
|
|
from player_outs import find_and_log_historical_player_outs
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s [%(levelname)s] %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def seed_nba_coaching(season='2024-25'):
|
|
"""Seed NBA coaching data from LeagueGameLog for a full season."""
|
|
from nba_api.stats.endpoints import LeagueGameLog
|
|
|
|
logger.info(f"Fetching NBA game log for season {season}...")
|
|
game_log = LeagueGameLog(season=season, season_type_all_star='Regular Season')
|
|
df = game_log.get_data_frames()[0]
|
|
|
|
game_ids = df['GAME_ID'].unique()
|
|
total = len(game_ids)
|
|
logger.info(f"Found {total} unique NBA games to process.")
|
|
|
|
for i, game_id in enumerate(game_ids, start=1):
|
|
try:
|
|
parse_nba_coaching_from_game_id(game_id)
|
|
except Exception as e:
|
|
logger.error(f"NBA game {game_id} failed: {e}")
|
|
|
|
if i % 50 == 0:
|
|
logger.info(f"NBA coaching progress: {i}/{total} games processed")
|
|
|
|
time.sleep(0.6)
|
|
|
|
logger.info(f"NBA coaching seed complete. {total} games processed.")
|
|
|
|
|
|
def seed_mlb_coaching(season=2024):
|
|
"""Seed MLB coaching data from statsapi schedule for a full season."""
|
|
import statsapi
|
|
|
|
start_date = f'{season}-03-28'
|
|
end_date = f'{season}-09-29'
|
|
|
|
logger.info(f"Fetching MLB schedule for {start_date} to {end_date}...")
|
|
schedule = statsapi.schedule(start_date=start_date, end_date=end_date)
|
|
|
|
game_ids = [g['game_id'] for g in schedule]
|
|
total = len(game_ids)
|
|
logger.info(f"Found {total} MLB games to process.")
|
|
|
|
for i, game_id in enumerate(game_ids, start=1):
|
|
try:
|
|
parse_mlb_coaching_from_game_id(game_id)
|
|
except Exception as e:
|
|
logger.error(f"MLB game {game_id} failed: {e}")
|
|
|
|
if i % 100 == 0:
|
|
logger.info(f"MLB coaching progress: {i}/{total} games processed")
|
|
|
|
time.sleep(0.3)
|
|
|
|
logger.info(f"MLB coaching seed complete. {total} games processed.")
|
|
|
|
|
|
def seed_player_out_history(season='2024-25'):
|
|
"""Seed historical player-out data for a given season."""
|
|
logger.info(f"Seeding player-out history for season {season}...")
|
|
try:
|
|
find_and_log_historical_player_outs(season=season)
|
|
logger.info("Player-out history seed complete.")
|
|
except Exception as e:
|
|
logger.error(f"Player-out history seed failed: {e}")
|
|
raise
|
|
|
|
|
|
if __name__ == '__main__':
|
|
logger.info("=== VYNDR Historical Data Seeder ===")
|
|
logger.info("This should be run ONCE before launch.\n")
|
|
|
|
logger.info("--- Step 1/3: NBA Coaching ---")
|
|
seed_nba_coaching(season='2024-25')
|
|
|
|
logger.info("--- Step 2/3: MLB Coaching ---")
|
|
seed_mlb_coaching(season=2024)
|
|
|
|
logger.info("--- Step 3/3: Player-Out History ---")
|
|
seed_player_out_history(season='2024-25')
|
|
|
|
logger.info("=== All historical seeds complete. ===")
|