Sessions 5-7a: 955 tests, deployment ready
This commit is contained in:
@@ -0,0 +1,157 @@
|
||||
/**
|
||||
* OpenRouter — LLM inference adapter (Engine 2).
|
||||
*
|
||||
* Primary: DeepSeek V3 (deepseek/deepseek-chat) — best reasoning/dollar,
|
||||
* returns clean JSON when asked nicely.
|
||||
* Fallback: Nemotron (nvidia/llama-3.3-nemotron-super-49b-v1) — used when
|
||||
* primary 429s, 5xxs, or times out.
|
||||
*
|
||||
* SECURITY POSTURE:
|
||||
* - OPENROUTER_API_KEY is the most sensitive secret in this app. We
|
||||
* accept the key from env and pass it as a Bearer header — it never
|
||||
* appears in URLs, logs, or error messages we emit. Axios errors that
|
||||
* wrap the request are caught before re-throw to scrub headers.
|
||||
* - We do NOT include the string 'VYNDR' in prompts. OpenRouter is a
|
||||
* pass-through to third-party models and we don't want our brand
|
||||
* name in their training/QA pipelines.
|
||||
*
|
||||
* EXPORTS:
|
||||
* configured() → boolean
|
||||
* analyze(systemMessage, userPrompt) → { response, modelUsed, latencyMs }
|
||||
* or null on total failure
|
||||
* getUsage() → { requestsToday, requestsRemaining }
|
||||
*/
|
||||
|
||||
const axios = require('axios');
|
||||
const { createLimiter, createCircuitBreaker } = require('../../utils/rateLimiter');
|
||||
|
||||
const SOURCE = 'openrouter';
|
||||
const BASE_URL = process.env.OPENROUTER_BASE_URL || 'https://openrouter.ai/api/v1';
|
||||
const HTTP_TIMEOUT_MS = 30_000;
|
||||
|
||||
const PRIMARY_MODEL = process.env.OPENROUTER_PRIMARY_MODEL || 'deepseek/deepseek-chat';
|
||||
const FALLBACK_MODEL = process.env.OPENROUTER_FALLBACK_MODEL || 'nvidia/llama-3.3-nemotron-super-49b-v1';
|
||||
|
||||
// 20 req/min, 1000/day. The day counter is in-memory; it resets on process
|
||||
// restart. That's good enough for free-tier accounting — we hit the cap
|
||||
// well before midnight in normal traffic patterns.
|
||||
const limiter = createLimiter({ tokensPerInterval: 20, interval: 60_000 });
|
||||
const breaker = createCircuitBreaker({ failureThreshold: 3, resetTimeout: 60_000 });
|
||||
|
||||
const DAILY_CAP = 1000;
|
||||
const usage = { requestsToday: 0, dayBucket: new Date().toISOString().slice(0, 10) };
|
||||
|
||||
function noteUsage() {
|
||||
const today = new Date().toISOString().slice(0, 10);
|
||||
if (today !== usage.dayBucket) {
|
||||
usage.dayBucket = today;
|
||||
usage.requestsToday = 0;
|
||||
}
|
||||
usage.requestsToday += 1;
|
||||
}
|
||||
|
||||
function configured() {
|
||||
return !!process.env.OPENROUTER_API_KEY;
|
||||
}
|
||||
|
||||
function getUsage() {
|
||||
return {
|
||||
requestsToday: usage.requestsToday,
|
||||
requestsRemaining: Math.max(0, DAILY_CAP - usage.requestsToday),
|
||||
};
|
||||
}
|
||||
|
||||
// Scrub axios errors before anything user-facing — the headers, request
|
||||
// body, and full URL may contain the key.
|
||||
function scrubError(err) {
|
||||
return {
|
||||
code: err?.code,
|
||||
status: err?.response?.status,
|
||||
message: err?.message || 'unknown',
|
||||
};
|
||||
}
|
||||
|
||||
async function callModel(model, systemMessage, userPrompt) {
|
||||
const start = Date.now();
|
||||
const body = {
|
||||
model,
|
||||
messages: [
|
||||
{ role: 'system', content: systemMessage },
|
||||
{ role: 'user', content: userPrompt },
|
||||
],
|
||||
temperature: 0.1,
|
||||
max_tokens: 500,
|
||||
// response_format works on OpenAI-compatible endpoints; harmless if a
|
||||
// model ignores it. We still validate the response ourselves.
|
||||
response_format: { type: 'json_object' },
|
||||
};
|
||||
const res = await axios.post(`${BASE_URL}/chat/completions`, body, {
|
||||
headers: {
|
||||
Authorization: `Bearer ${process.env.OPENROUTER_API_KEY}`,
|
||||
'Content-Type': 'application/json',
|
||||
// OpenRouter recommends setting referer + title for usage tracking.
|
||||
// Neither contains 'VYNDR' branding — they're generic per their docs.
|
||||
'HTTP-Referer': process.env.OPENROUTER_REFERER || 'https://vyndr.app',
|
||||
'X-Title': process.env.OPENROUTER_TITLE || 'Sports Analytics',
|
||||
},
|
||||
timeout: HTTP_TIMEOUT_MS,
|
||||
validateStatus: (s) => (s >= 200 && s < 300) || s === 429 || (s >= 500 && s < 600),
|
||||
});
|
||||
if (res.status === 429) {
|
||||
const err = new Error('openrouter rate limited');
|
||||
err.code = 'OPENROUTER_429';
|
||||
throw err;
|
||||
}
|
||||
if (res.status >= 500) {
|
||||
const err = new Error(`openrouter 5xx (${res.status})`);
|
||||
err.code = 'OPENROUTER_5XX';
|
||||
throw err;
|
||||
}
|
||||
const content = res.data?.choices?.[0]?.message?.content;
|
||||
if (!content) {
|
||||
const err = new Error('openrouter empty response');
|
||||
err.code = 'OPENROUTER_EMPTY';
|
||||
throw err;
|
||||
}
|
||||
return { response: content, modelUsed: model, latencyMs: Date.now() - start };
|
||||
}
|
||||
|
||||
async function analyze(systemMessage, userPrompt) {
|
||||
if (!configured()) return null;
|
||||
if (typeof systemMessage !== 'string' || typeof userPrompt !== 'string') return null;
|
||||
if (usage.requestsToday >= DAILY_CAP) {
|
||||
console.warn(`[${SOURCE}] daily cap reached (${DAILY_CAP})`);
|
||||
return null;
|
||||
}
|
||||
await limiter.waitForToken();
|
||||
|
||||
// Try primary; on failure, retry once with the fallback model.
|
||||
try {
|
||||
const result = await breaker.call(() => callModel(PRIMARY_MODEL, systemMessage, userPrompt));
|
||||
noteUsage();
|
||||
return result;
|
||||
} catch (primaryErr) {
|
||||
const scrubbed = scrubError(primaryErr);
|
||||
if (primaryErr?.code === 'CIRCUIT_OPEN') {
|
||||
// Don't burn the second model when the breaker says everything is down.
|
||||
return null;
|
||||
}
|
||||
console.warn(`[${SOURCE}] primary failed:`, scrubbed);
|
||||
try {
|
||||
// Fallback bypasses the breaker — different model, different upstream.
|
||||
const result = await callModel(FALLBACK_MODEL, systemMessage, userPrompt);
|
||||
noteUsage();
|
||||
return result;
|
||||
} catch (fallbackErr) {
|
||||
console.warn(`[${SOURCE}] fallback also failed:`, scrubError(fallbackErr));
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
configured,
|
||||
analyze,
|
||||
getUsage,
|
||||
__internals: { limiter, breaker, callModel, scrubError, PRIMARY_MODEL, FALLBACK_MODEL, usage },
|
||||
};
|
||||
Reference in New Issue
Block a user