Sessions 5-7a: 955 tests, deployment ready
This commit is contained in:
@@ -0,0 +1,105 @@
|
||||
"""
|
||||
seed_historical.py — One-time historical data seeder for VYNDR.
|
||||
Run ONCE before launch to backfill coaching and player-out data.
|
||||
|
||||
Usage:
|
||||
python scripts/seed_historical.py
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
|
||||
# Allow imports from src/services/python
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src', 'services', 'python'))
|
||||
|
||||
from coaching_parser import parse_nba_coaching_from_game_id, parse_mlb_coaching_from_game_id
|
||||
from player_outs import find_and_log_historical_player_outs
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s [%(levelname)s] %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def seed_nba_coaching(season='2024-25'):
|
||||
"""Seed NBA coaching data from LeagueGameLog for a full season."""
|
||||
from nba_api.stats.endpoints import LeagueGameLog
|
||||
|
||||
logger.info(f"Fetching NBA game log for season {season}...")
|
||||
game_log = LeagueGameLog(season=season, season_type_all_star='Regular Season')
|
||||
df = game_log.get_data_frames()[0]
|
||||
|
||||
game_ids = df['GAME_ID'].unique()
|
||||
total = len(game_ids)
|
||||
logger.info(f"Found {total} unique NBA games to process.")
|
||||
|
||||
for i, game_id in enumerate(game_ids, start=1):
|
||||
try:
|
||||
parse_nba_coaching_from_game_id(game_id)
|
||||
except Exception as e:
|
||||
logger.error(f"NBA game {game_id} failed: {e}")
|
||||
|
||||
if i % 50 == 0:
|
||||
logger.info(f"NBA coaching progress: {i}/{total} games processed")
|
||||
|
||||
time.sleep(0.6)
|
||||
|
||||
logger.info(f"NBA coaching seed complete. {total} games processed.")
|
||||
|
||||
|
||||
def seed_mlb_coaching(season=2024):
|
||||
"""Seed MLB coaching data from statsapi schedule for a full season."""
|
||||
import statsapi
|
||||
|
||||
start_date = f'{season}-03-28'
|
||||
end_date = f'{season}-09-29'
|
||||
|
||||
logger.info(f"Fetching MLB schedule for {start_date} to {end_date}...")
|
||||
schedule = statsapi.schedule(start_date=start_date, end_date=end_date)
|
||||
|
||||
game_ids = [g['game_id'] for g in schedule]
|
||||
total = len(game_ids)
|
||||
logger.info(f"Found {total} MLB games to process.")
|
||||
|
||||
for i, game_id in enumerate(game_ids, start=1):
|
||||
try:
|
||||
parse_mlb_coaching_from_game_id(game_id)
|
||||
except Exception as e:
|
||||
logger.error(f"MLB game {game_id} failed: {e}")
|
||||
|
||||
if i % 100 == 0:
|
||||
logger.info(f"MLB coaching progress: {i}/{total} games processed")
|
||||
|
||||
time.sleep(0.3)
|
||||
|
||||
logger.info(f"MLB coaching seed complete. {total} games processed.")
|
||||
|
||||
|
||||
def seed_player_out_history(season='2024-25'):
|
||||
"""Seed historical player-out data for a given season."""
|
||||
logger.info(f"Seeding player-out history for season {season}...")
|
||||
try:
|
||||
find_and_log_historical_player_outs(season=season)
|
||||
logger.info("Player-out history seed complete.")
|
||||
except Exception as e:
|
||||
logger.error(f"Player-out history seed failed: {e}")
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logger.info("=== VYNDR Historical Data Seeder ===")
|
||||
logger.info("This should be run ONCE before launch.\n")
|
||||
|
||||
logger.info("--- Step 1/3: NBA Coaching ---")
|
||||
seed_nba_coaching(season='2024-25')
|
||||
|
||||
logger.info("--- Step 2/3: MLB Coaching ---")
|
||||
seed_mlb_coaching(season=2024)
|
||||
|
||||
logger.info("--- Step 3/3: Player-Out History ---")
|
||||
seed_player_out_history(season='2024-25')
|
||||
|
||||
logger.info("=== All historical seeds complete. ===")
|
||||
Reference in New Issue
Block a user