vyndr/src/services/adapters/openRouterAdapter.js

/**
 * OpenRouter — LLM inference adapter (Engine 2).
 *
 * Primary:  DeepSeek V3 (deepseek/deepseek-chat) — best reasoning/dollar,
 *           returns clean JSON when asked nicely.
 * Fallback: Nemotron (nvidia/llama-3.3-nemotron-super-49b-v1) — used when
 *           primary 429s, 5xxs, or times out.
 *
 * SECURITY POSTURE:
 *   - OPENROUTER_API_KEY is the most sensitive secret in this app. We
 *     accept the key from env and pass it as a Bearer header — it never
 *     appears in URLs, logs, or error messages we emit. Axios errors that
 *     wrap the request are caught before re-throw to scrub headers.
 *   - We do NOT include the string 'VYNDR' in prompts. OpenRouter is a
 *     pass-through to third-party models and we don't want our brand
 *     name in their training/QA pipelines.
 *
 * EXPORTS:
 *   configured()                         → boolean
 *   analyze(systemMessage, userPrompt)   → { response, modelUsed, latencyMs }
 *                                          or null on total failure
 *   getUsage()                           → { requestsToday, requestsRemaining }
 */

const axios = require('axios');
const { createLimiter, createCircuitBreaker } = require('../../utils/rateLimiter');

const SOURCE = 'openrouter';
const BASE_URL = process.env.OPENROUTER_BASE_URL || 'https://openrouter.ai/api/v1';
const HTTP_TIMEOUT_MS = 30_000;

const PRIMARY_MODEL = process.env.OPENROUTER_PRIMARY_MODEL || 'deepseek/deepseek-chat';
const FALLBACK_MODEL = process.env.OPENROUTER_FALLBACK_MODEL || 'nvidia/llama-3.3-nemotron-super-49b-v1';

// 20 req/min, 1000/day. The day counter is in-memory; it resets on process
// restart. That's good enough for free-tier accounting — we hit the cap
// well before midnight in normal traffic patterns.
const limiter = createLimiter({ tokensPerInterval: 20, interval: 60_000 });
const breaker = createCircuitBreaker({ failureThreshold: 3, resetTimeout: 60_000 });

const DAILY_CAP = 1000;
const usage = { requestsToday: 0, dayBucket: new Date().toISOString().slice(0, 10) };

function noteUsage() {
  const today = new Date().toISOString().slice(0, 10);
  if (today !== usage.dayBucket) {
    usage.dayBucket = today;
    usage.requestsToday = 0;
  }
  usage.requestsToday += 1;
}

function configured() {
  return !!process.env.OPENROUTER_API_KEY;
}

function getUsage() {
  return {
    requestsToday: usage.requestsToday,
    requestsRemaining: Math.max(0, DAILY_CAP - usage.requestsToday),
  };
}

// Scrub axios errors before anything user-facing — the headers, request
// body, and full URL may contain the key.
function scrubError(err) {
  return {
    code: err?.code,
    status: err?.response?.status,
    message: err?.message || 'unknown',
  };
}

async function callModel(model, systemMessage, userPrompt) {
  const start = Date.now();
  const body = {
    model,
    messages: [
      { role: 'system', content: systemMessage },
      { role: 'user', content: userPrompt },
    ],
    temperature: 0.1,
    max_tokens: 500,
    // response_format works on OpenAI-compatible endpoints; harmless if a
    // model ignores it. We still validate the response ourselves.
    response_format: { type: 'json_object' },
  };
  const res = await axios.post(`${BASE_URL}/chat/completions`, body, {
    headers: {
      Authorization: `Bearer ${process.env.OPENROUTER_API_KEY}`,
      'Content-Type': 'application/json',
      // OpenRouter recommends setting referer + title for usage tracking.
      // Neither contains 'VYNDR' branding — they're generic per their docs.
      'HTTP-Referer': process.env.OPENROUTER_REFERER || 'https://vyndr.app',
      'X-Title': process.env.OPENROUTER_TITLE || 'Sports Analytics',
    },
    timeout: HTTP_TIMEOUT_MS,
    validateStatus: (s) => (s >= 200 && s < 300) || s === 429 || (s >= 500 && s < 600),
  });
  if (res.status === 429) {
    const err = new Error('openrouter rate limited');
    err.code = 'OPENROUTER_429';
    throw err;
  }
  if (res.status >= 500) {
    const err = new Error(`openrouter 5xx (${res.status})`);
    err.code = 'OPENROUTER_5XX';
    throw err;
  }
  const content = res.data?.choices?.[0]?.message?.content;
  if (!content) {
    const err = new Error('openrouter empty response');
    err.code = 'OPENROUTER_EMPTY';
    throw err;
  }
  return { response: content, modelUsed: model, latencyMs: Date.now() - start };
}

async function analyze(systemMessage, userPrompt) {
  if (!configured()) return null;
  if (typeof systemMessage !== 'string' || typeof userPrompt !== 'string') return null;
  if (usage.requestsToday >= DAILY_CAP) {
    console.warn(`[${SOURCE}] daily cap reached (${DAILY_CAP})`);
    return null;
  }
  await limiter.waitForToken();

  // Try primary; on failure, retry once with the fallback model.
  try {
    const result = await breaker.call(() => callModel(PRIMARY_MODEL, systemMessage, userPrompt));
    noteUsage();
    return result;
  } catch (primaryErr) {
    const scrubbed = scrubError(primaryErr);
    if (primaryErr?.code === 'CIRCUIT_OPEN') {
      // Don't burn the second model when the breaker says everything is down.
      return null;
    }
    console.warn(`[${SOURCE}] primary failed:`, scrubbed);
    try {
      // Fallback bypasses the breaker — different model, different upstream.
      const result = await callModel(FALLBACK_MODEL, systemMessage, userPrompt);
      noteUsage();
      return result;
    } catch (fallbackErr) {
      console.warn(`[${SOURCE}] fallback also failed:`, scrubError(fallbackErr));
      return null;
    }
  }
}

module.exports = {
  configured,
  analyze,
  getUsage,
  __internals: { limiter, breaker, callModel, scrubError, PRIMARY_MODEL, FALLBACK_MODEL, usage },
};