158 lines
5.6 KiB
JavaScript
158 lines
5.6 KiB
JavaScript
/**
|
|
* OpenRouter — LLM inference adapter (Engine 2).
|
|
*
|
|
* Primary: DeepSeek V3 (deepseek/deepseek-chat) — best reasoning/dollar,
|
|
* returns clean JSON when asked nicely.
|
|
* Fallback: Nemotron (nvidia/llama-3.3-nemotron-super-49b-v1) — used when
|
|
* primary 429s, 5xxs, or times out.
|
|
*
|
|
* SECURITY POSTURE:
|
|
* - OPENROUTER_API_KEY is the most sensitive secret in this app. We
|
|
* accept the key from env and pass it as a Bearer header — it never
|
|
* appears in URLs, logs, or error messages we emit. Axios errors that
|
|
* wrap the request are caught before re-throw to scrub headers.
|
|
* - We do NOT include the string 'VYNDR' in prompts. OpenRouter is a
|
|
* pass-through to third-party models and we don't want our brand
|
|
* name in their training/QA pipelines.
|
|
*
|
|
* EXPORTS:
|
|
* configured() → boolean
|
|
* analyze(systemMessage, userPrompt) → { response, modelUsed, latencyMs }
|
|
* or null on total failure
|
|
* getUsage() → { requestsToday, requestsRemaining }
|
|
*/
|
|
|
|
const axios = require('axios');
|
|
const { createLimiter, createCircuitBreaker } = require('../../utils/rateLimiter');
|
|
|
|
const SOURCE = 'openrouter';
|
|
const BASE_URL = process.env.OPENROUTER_BASE_URL || 'https://openrouter.ai/api/v1';
|
|
const HTTP_TIMEOUT_MS = 30_000;
|
|
|
|
const PRIMARY_MODEL = process.env.OPENROUTER_PRIMARY_MODEL || 'deepseek/deepseek-chat';
|
|
const FALLBACK_MODEL = process.env.OPENROUTER_FALLBACK_MODEL || 'nvidia/llama-3.3-nemotron-super-49b-v1';
|
|
|
|
// 20 req/min, 1000/day. The day counter is in-memory; it resets on process
|
|
// restart. That's good enough for free-tier accounting — we hit the cap
|
|
// well before midnight in normal traffic patterns.
|
|
const limiter = createLimiter({ tokensPerInterval: 20, interval: 60_000 });
|
|
const breaker = createCircuitBreaker({ failureThreshold: 3, resetTimeout: 60_000 });
|
|
|
|
const DAILY_CAP = 1000;
|
|
const usage = { requestsToday: 0, dayBucket: new Date().toISOString().slice(0, 10) };
|
|
|
|
function noteUsage() {
|
|
const today = new Date().toISOString().slice(0, 10);
|
|
if (today !== usage.dayBucket) {
|
|
usage.dayBucket = today;
|
|
usage.requestsToday = 0;
|
|
}
|
|
usage.requestsToday += 1;
|
|
}
|
|
|
|
function configured() {
|
|
return !!process.env.OPENROUTER_API_KEY;
|
|
}
|
|
|
|
function getUsage() {
|
|
return {
|
|
requestsToday: usage.requestsToday,
|
|
requestsRemaining: Math.max(0, DAILY_CAP - usage.requestsToday),
|
|
};
|
|
}
|
|
|
|
// Scrub axios errors before anything user-facing — the headers, request
|
|
// body, and full URL may contain the key.
|
|
function scrubError(err) {
|
|
return {
|
|
code: err?.code,
|
|
status: err?.response?.status,
|
|
message: err?.message || 'unknown',
|
|
};
|
|
}
|
|
|
|
async function callModel(model, systemMessage, userPrompt) {
|
|
const start = Date.now();
|
|
const body = {
|
|
model,
|
|
messages: [
|
|
{ role: 'system', content: systemMessage },
|
|
{ role: 'user', content: userPrompt },
|
|
],
|
|
temperature: 0.1,
|
|
max_tokens: 500,
|
|
// response_format works on OpenAI-compatible endpoints; harmless if a
|
|
// model ignores it. We still validate the response ourselves.
|
|
response_format: { type: 'json_object' },
|
|
};
|
|
const res = await axios.post(`${BASE_URL}/chat/completions`, body, {
|
|
headers: {
|
|
Authorization: `Bearer ${process.env.OPENROUTER_API_KEY}`,
|
|
'Content-Type': 'application/json',
|
|
// OpenRouter recommends setting referer + title for usage tracking.
|
|
// Neither contains 'VYNDR' branding — they're generic per their docs.
|
|
'HTTP-Referer': process.env.OPENROUTER_REFERER || 'https://vyndr.app',
|
|
'X-Title': process.env.OPENROUTER_TITLE || 'Sports Analytics',
|
|
},
|
|
timeout: HTTP_TIMEOUT_MS,
|
|
validateStatus: (s) => (s >= 200 && s < 300) || s === 429 || (s >= 500 && s < 600),
|
|
});
|
|
if (res.status === 429) {
|
|
const err = new Error('openrouter rate limited');
|
|
err.code = 'OPENROUTER_429';
|
|
throw err;
|
|
}
|
|
if (res.status >= 500) {
|
|
const err = new Error(`openrouter 5xx (${res.status})`);
|
|
err.code = 'OPENROUTER_5XX';
|
|
throw err;
|
|
}
|
|
const content = res.data?.choices?.[0]?.message?.content;
|
|
if (!content) {
|
|
const err = new Error('openrouter empty response');
|
|
err.code = 'OPENROUTER_EMPTY';
|
|
throw err;
|
|
}
|
|
return { response: content, modelUsed: model, latencyMs: Date.now() - start };
|
|
}
|
|
|
|
async function analyze(systemMessage, userPrompt) {
|
|
if (!configured()) return null;
|
|
if (typeof systemMessage !== 'string' || typeof userPrompt !== 'string') return null;
|
|
if (usage.requestsToday >= DAILY_CAP) {
|
|
console.warn(`[${SOURCE}] daily cap reached (${DAILY_CAP})`);
|
|
return null;
|
|
}
|
|
await limiter.waitForToken();
|
|
|
|
// Try primary; on failure, retry once with the fallback model.
|
|
try {
|
|
const result = await breaker.call(() => callModel(PRIMARY_MODEL, systemMessage, userPrompt));
|
|
noteUsage();
|
|
return result;
|
|
} catch (primaryErr) {
|
|
const scrubbed = scrubError(primaryErr);
|
|
if (primaryErr?.code === 'CIRCUIT_OPEN') {
|
|
// Don't burn the second model when the breaker says everything is down.
|
|
return null;
|
|
}
|
|
console.warn(`[${SOURCE}] primary failed:`, scrubbed);
|
|
try {
|
|
// Fallback bypasses the breaker — different model, different upstream.
|
|
const result = await callModel(FALLBACK_MODEL, systemMessage, userPrompt);
|
|
noteUsage();
|
|
return result;
|
|
} catch (fallbackErr) {
|
|
console.warn(`[${SOURCE}] fallback also failed:`, scrubError(fallbackErr));
|
|
return null;
|
|
}
|
|
}
|
|
}
|
|
|
|
module.exports = {
|
|
configured,
|
|
analyze,
|
|
getUsage,
|
|
__internals: { limiter, breaker, callModel, scrubError, PRIMARY_MODEL, FALLBACK_MODEL, usage },
|
|
};
|