/** * OpenRouter — LLM inference adapter (Engine 2). * * Primary: DeepSeek V3 (deepseek/deepseek-chat) — best reasoning/dollar, * returns clean JSON when asked nicely. * Fallback: Nemotron (nvidia/llama-3.3-nemotron-super-49b-v1) — used when * primary 429s, 5xxs, or times out. * * SECURITY POSTURE: * - OPENROUTER_API_KEY is the most sensitive secret in this app. We * accept the key from env and pass it as a Bearer header — it never * appears in URLs, logs, or error messages we emit. Axios errors that * wrap the request are caught before re-throw to scrub headers. * - We do NOT include the string 'VYNDR' in prompts. OpenRouter is a * pass-through to third-party models and we don't want our brand * name in their training/QA pipelines. * * EXPORTS: * configured() → boolean * analyze(systemMessage, userPrompt) → { response, modelUsed, latencyMs } * or null on total failure * getUsage() → { requestsToday, requestsRemaining } */ const axios = require('axios'); const { createLimiter, createCircuitBreaker } = require('../../utils/rateLimiter'); const SOURCE = 'openrouter'; const BASE_URL = process.env.OPENROUTER_BASE_URL || 'https://openrouter.ai/api/v1'; const HTTP_TIMEOUT_MS = 30_000; const PRIMARY_MODEL = process.env.OPENROUTER_PRIMARY_MODEL || 'deepseek/deepseek-chat'; const FALLBACK_MODEL = process.env.OPENROUTER_FALLBACK_MODEL || 'nvidia/llama-3.3-nemotron-super-49b-v1'; // 20 req/min, 1000/day. The day counter is in-memory; it resets on process // restart. That's good enough for free-tier accounting — we hit the cap // well before midnight in normal traffic patterns. const limiter = createLimiter({ tokensPerInterval: 20, interval: 60_000 }); const breaker = createCircuitBreaker({ failureThreshold: 3, resetTimeout: 60_000 }); const DAILY_CAP = 1000; const usage = { requestsToday: 0, dayBucket: new Date().toISOString().slice(0, 10) }; function noteUsage() { const today = new Date().toISOString().slice(0, 10); if (today !== usage.dayBucket) { usage.dayBucket = today; usage.requestsToday = 0; } usage.requestsToday += 1; } function configured() { return !!process.env.OPENROUTER_API_KEY; } function getUsage() { return { requestsToday: usage.requestsToday, requestsRemaining: Math.max(0, DAILY_CAP - usage.requestsToday), }; } // Scrub axios errors before anything user-facing — the headers, request // body, and full URL may contain the key. function scrubError(err) { return { code: err?.code, status: err?.response?.status, message: err?.message || 'unknown', }; } async function callModel(model, systemMessage, userPrompt) { const start = Date.now(); const body = { model, messages: [ { role: 'system', content: systemMessage }, { role: 'user', content: userPrompt }, ], temperature: 0.1, max_tokens: 500, // response_format works on OpenAI-compatible endpoints; harmless if a // model ignores it. We still validate the response ourselves. response_format: { type: 'json_object' }, }; const res = await axios.post(`${BASE_URL}/chat/completions`, body, { headers: { Authorization: `Bearer ${process.env.OPENROUTER_API_KEY}`, 'Content-Type': 'application/json', // OpenRouter recommends setting referer + title for usage tracking. // Neither contains 'VYNDR' branding — they're generic per their docs. 'HTTP-Referer': process.env.OPENROUTER_REFERER || 'https://vyndr.app', 'X-Title': process.env.OPENROUTER_TITLE || 'Sports Analytics', }, timeout: HTTP_TIMEOUT_MS, validateStatus: (s) => (s >= 200 && s < 300) || s === 429 || (s >= 500 && s < 600), }); if (res.status === 429) { const err = new Error('openrouter rate limited'); err.code = 'OPENROUTER_429'; throw err; } if (res.status >= 500) { const err = new Error(`openrouter 5xx (${res.status})`); err.code = 'OPENROUTER_5XX'; throw err; } const content = res.data?.choices?.[0]?.message?.content; if (!content) { const err = new Error('openrouter empty response'); err.code = 'OPENROUTER_EMPTY'; throw err; } return { response: content, modelUsed: model, latencyMs: Date.now() - start }; } async function analyze(systemMessage, userPrompt) { if (!configured()) return null; if (typeof systemMessage !== 'string' || typeof userPrompt !== 'string') return null; if (usage.requestsToday >= DAILY_CAP) { console.warn(`[${SOURCE}] daily cap reached (${DAILY_CAP})`); return null; } await limiter.waitForToken(); // Try primary; on failure, retry once with the fallback model. try { const result = await breaker.call(() => callModel(PRIMARY_MODEL, systemMessage, userPrompt)); noteUsage(); return result; } catch (primaryErr) { const scrubbed = scrubError(primaryErr); if (primaryErr?.code === 'CIRCUIT_OPEN') { // Don't burn the second model when the breaker says everything is down. return null; } console.warn(`[${SOURCE}] primary failed:`, scrubbed); try { // Fallback bypasses the breaker — different model, different upstream. const result = await callModel(FALLBACK_MODEL, systemMessage, userPrompt); noteUsage(); return result; } catch (fallbackErr) { console.warn(`[${SOURCE}] fallback also failed:`, scrubError(fallbackErr)); return null; } } } module.exports = { configured, analyze, getUsage, __internals: { limiter, breaker, callModel, scrubError, PRIMARY_MODEL, FALLBACK_MODEL, usage }, };