Self-contained Dockerized build for end users. Run via docker compose; see README.md for setup. Source-only, no sample data or build artifacts. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
424 lines
17 KiB
JavaScript
424 lines
17 KiB
JavaScript
// lib/providers.js — one interface, five backends.
|
|
// All calls are made server-side (no CORS issues, keys never touch the browser).
|
|
import { resolveGeneration } from "./model-caps";
|
|
|
|
const VERSION_HEADER = { "anthropic-version": "2023-06-01" };
|
|
|
|
function cleanBase(url, fallback) {
|
|
let b = (url || fallback || "").trim();
|
|
if (!b) return fallback;
|
|
return b.replace(/\/+$/, "");
|
|
}
|
|
|
|
async function readError(res) {
|
|
let detail = "";
|
|
try {
|
|
const j = await res.json();
|
|
detail = j?.error?.message || j?.error || j?.message || JSON.stringify(j).slice(0, 200);
|
|
} catch {
|
|
try { detail = (await res.text()).slice(0, 200); } catch {}
|
|
}
|
|
return detail;
|
|
}
|
|
|
|
function friendlyConnError(provider, base, err) {
|
|
const msg = String(err?.message || err);
|
|
const cause = String(err?.cause?.code || err?.cause?.message || "");
|
|
// Node's fetch aborts requests that go quiet too long; without this check a
|
|
// slow (but healthy) local model gets misreported as "not running".
|
|
if (/timeout/i.test(msg + " " + cause)) {
|
|
return new Error(
|
|
`${providerLabel(provider)} took too long to respond and the connection timed out. The model may still be working — for local models, try a smaller/faster model or wait and retry.`
|
|
);
|
|
}
|
|
if (/fetch failed|ECONNREFUSED|ENOTFOUND|EAI_AGAIN|aborted|network|terminated|socket/i.test(msg + " " + cause)) {
|
|
if (provider === "ollama")
|
|
return new Error(`Could not reach Ollama at ${base}. Make sure Ollama is running (open the Ollama app, or run \`ollama serve\`).`);
|
|
if (provider === "lmstudio")
|
|
return new Error(`Could not reach LM Studio at ${base}. In LM Studio, open the Developer tab and start the local server.`);
|
|
return new Error(`Could not reach the ${provider} API. Check your internet connection. (${msg})`);
|
|
}
|
|
return err;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// chat(settings, opts) -> string
|
|
// opts: { system, user, temperature, maxTokens, expectJson }
|
|
// ----------------------------------------------------------------------------
|
|
export async function chat(settings, opts) {
|
|
const provider = settings.provider;
|
|
const cfg = settings.providers?.[provider] || {};
|
|
const { system = "", user = "", expectJson = false, signal } = opts;
|
|
|
|
if (!cfg.model) {
|
|
throw new Error(`No model selected for ${providerLabel(provider)}. Open Settings, pick a model, and save.`);
|
|
}
|
|
|
|
// Per-model defaults: auto mode sizes these to the model's real limits.
|
|
const resolved = await resolveGeneration(settings);
|
|
const temperature = opts.temperature ?? resolved.temperature;
|
|
let maxTokens = opts.maxTokens ?? resolved.maxTokens;
|
|
if (resolved.caps.maxOutputTokens) maxTokens = Math.min(maxTokens, resolved.caps.maxOutputTokens);
|
|
|
|
switch (provider) {
|
|
case "ollama":
|
|
return ollamaChat(cfg, {
|
|
system, user, temperature, maxTokens, expectJson, signal,
|
|
// Only size num_ctx when we actually know the model's window.
|
|
contextTokens: resolved.caps.source === "live" ? resolved.caps.contextTokens : 0,
|
|
});
|
|
case "lmstudio":
|
|
return openaiCompatChat("lmstudio", cleanBase(cfg.baseUrl, "http://localhost:1234") + "/v1", null, cfg.model, { system, user, temperature, maxTokens, expectJson, signal });
|
|
case "openai":
|
|
requireKey(cfg, "OpenAI");
|
|
return openaiCompatChat("openai", "https://api.openai.com/v1", cfg.apiKey, cfg.model, { system, user, temperature, maxTokens, expectJson, signal });
|
|
case "anthropic":
|
|
requireKey(cfg, "Anthropic");
|
|
return anthropicChat(cfg, { system, user, temperature, maxTokens, signal });
|
|
case "google":
|
|
requireKey(cfg, "Google");
|
|
return googleChat(cfg, { system, user, temperature, maxTokens, expectJson, signal });
|
|
default:
|
|
throw new Error(`Unknown provider: ${provider}`);
|
|
}
|
|
}
|
|
|
|
function requireKey(cfg, name) {
|
|
if (!cfg.apiKey) throw new Error(`No API key set for ${name}. Add it on the Settings page.`);
|
|
}
|
|
|
|
// ---------- streaming helpers ----------
|
|
// All backends stream and accumulate server-side. Non-streaming requests sit
|
|
// silent until the full response is ready, and Node's fetch kills any request
|
|
// whose headers take >5 minutes — which long local generations routinely do.
|
|
// Streaming returns headers instantly and each chunk resets the idle timer.
|
|
async function* streamLines(res) {
|
|
const decoder = new TextDecoder();
|
|
let buf = "";
|
|
for await (const chunk of res.body) {
|
|
buf += decoder.decode(chunk, { stream: true });
|
|
let i;
|
|
while ((i = buf.indexOf("\n")) !== -1) {
|
|
const line = buf.slice(0, i).trim();
|
|
buf = buf.slice(i + 1);
|
|
if (line) yield line;
|
|
}
|
|
}
|
|
const last = (buf + decoder.decode()).trim();
|
|
if (last) yield last;
|
|
}
|
|
|
|
// Parse server-sent events, invoking onEvent for each JSON data payload.
|
|
async function readSse(res, onEvent) {
|
|
for await (const line of streamLines(res)) {
|
|
if (!line.startsWith("data:")) continue;
|
|
const payload = line.slice(5).trim();
|
|
if (!payload || payload === "[DONE]") continue;
|
|
let obj;
|
|
try { obj = JSON.parse(payload); } catch { continue; }
|
|
onEvent(obj);
|
|
}
|
|
}
|
|
|
|
export function providerLabel(p) {
|
|
return { ollama: "Ollama", lmstudio: "LM Studio", openai: "OpenAI", anthropic: "Anthropic", google: "Google AI" }[p] || p;
|
|
}
|
|
|
|
// ---------- Ollama ----------
|
|
async function ollamaChat(cfg, { system, user, temperature, maxTokens, expectJson, signal, contextTokens }) {
|
|
const base = cleanBase(cfg.baseUrl, "http://localhost:11434");
|
|
const options = { temperature, num_predict: maxTokens };
|
|
// Ollama defaults num_ctx to ~4k and silently truncates longer prompts, so
|
|
// size the window to this request, bounded by the model's real maximum.
|
|
// chars/3 over-estimates tokens on purpose; the margin covers chat template.
|
|
if (contextTokens) {
|
|
const needed = Math.ceil((system.length + user.length) / 3) + maxTokens + 512;
|
|
options.num_ctx = Math.min(contextTokens, Math.max(4096, Math.ceil(needed / 1024) * 1024));
|
|
}
|
|
let res;
|
|
try {
|
|
res = await fetch(base + "/api/chat", {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
signal,
|
|
body: JSON.stringify({
|
|
model: cfg.model,
|
|
stream: true,
|
|
messages: [
|
|
...(system ? [{ role: "system", content: system }] : []),
|
|
{ role: "user", content: user },
|
|
],
|
|
options,
|
|
...(expectJson ? { format: "json" } : {}),
|
|
}),
|
|
});
|
|
} catch (e) {
|
|
throw friendlyConnError("ollama", base, e);
|
|
}
|
|
if (!res.ok) throw new Error(`Ollama error (${res.status}): ${await readError(res)}`);
|
|
|
|
// NDJSON stream: one JSON object per line.
|
|
let out = "";
|
|
try {
|
|
for await (const line of streamLines(res)) {
|
|
let obj;
|
|
try { obj = JSON.parse(line); } catch { continue; }
|
|
if (obj?.error) throw new Error(`Ollama error: ${obj.error}`);
|
|
if (obj?.message?.content) out += obj.message.content;
|
|
}
|
|
} catch (e) {
|
|
if (/^Ollama error:/.test(String(e?.message))) throw e;
|
|
throw friendlyConnError("ollama", base, e);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
// ---------- OpenAI-compatible (OpenAI + LM Studio) ----------
|
|
async function openaiCompatChat(provider, base, apiKey, model, { system, user, temperature, maxTokens, expectJson, signal }) {
|
|
const headers = { "Content-Type": "application/json" };
|
|
if (apiKey) headers["Authorization"] = "Bearer " + apiKey;
|
|
|
|
const body = {
|
|
model,
|
|
stream: true,
|
|
temperature,
|
|
max_tokens: maxTokens,
|
|
messages: [
|
|
...(system ? [{ role: "system", content: system }] : []),
|
|
{ role: "user", content: user },
|
|
],
|
|
};
|
|
if (expectJson && provider === "openai") body.response_format = { type: "json_object" };
|
|
|
|
const send = () =>
|
|
fetch(base + "/chat/completions", { method: "POST", headers, signal, body: JSON.stringify(body) });
|
|
|
|
let res;
|
|
try {
|
|
res = await send();
|
|
} catch (e) {
|
|
throw friendlyConnError(provider, base, e);
|
|
}
|
|
|
|
// Some models reject response_format, temperature, or streaming; retry once without.
|
|
if (!res.ok) {
|
|
const detail = await readError(res);
|
|
let changed = false;
|
|
if (/response_format|temperature|unsupported|param/i.test(detail) && (body.response_format || body.temperature !== undefined)) {
|
|
delete body.response_format;
|
|
delete body.temperature;
|
|
body.max_completion_tokens = body.max_tokens;
|
|
delete body.max_tokens;
|
|
changed = true;
|
|
}
|
|
if (/stream/i.test(detail)) {
|
|
body.stream = false;
|
|
changed = true;
|
|
}
|
|
if (!changed) throw new Error(`${providerLabel(provider)} error (${res.status}): ${detail}`);
|
|
try {
|
|
res = await send();
|
|
} catch (e) {
|
|
throw friendlyConnError(provider, base, e);
|
|
}
|
|
if (!res.ok) throw new Error(`${providerLabel(provider)} error (${res.status}): ${await readError(res)}`);
|
|
}
|
|
|
|
if (!body.stream) {
|
|
const data = await res.json();
|
|
return data?.choices?.[0]?.message?.content ?? "";
|
|
}
|
|
|
|
let out = "";
|
|
try {
|
|
await readSse(res, (obj) => {
|
|
const delta = obj?.choices?.[0]?.delta;
|
|
if (delta?.content) out += delta.content;
|
|
});
|
|
} catch (e) {
|
|
throw friendlyConnError(provider, base, e);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
// ---------- Anthropic ----------
|
|
async function anthropicChat(cfg, { system, user, temperature, maxTokens, signal }) {
|
|
const body = {
|
|
model: cfg.model,
|
|
max_tokens: maxTokens,
|
|
temperature,
|
|
stream: true,
|
|
...(system ? { system } : {}),
|
|
messages: [{ role: "user", content: user }],
|
|
};
|
|
const send = () =>
|
|
fetch("https://api.anthropic.com/v1/messages", {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json", "x-api-key": cfg.apiKey, ...VERSION_HEADER },
|
|
signal,
|
|
body: JSON.stringify(body),
|
|
});
|
|
|
|
let res;
|
|
try {
|
|
res = await send();
|
|
} catch (e) {
|
|
throw friendlyConnError("anthropic", "api.anthropic.com", e);
|
|
}
|
|
|
|
// Newer Anthropic models (Opus 4.7+, Fable) reject sampling parameters;
|
|
// retry once without temperature.
|
|
if (!res.ok && res.status === 400 && body.temperature !== undefined) {
|
|
const detail = await readError(res);
|
|
if (/temperature|top_p|top_k|sampling/i.test(detail)) {
|
|
delete body.temperature;
|
|
try {
|
|
res = await send();
|
|
} catch (e) {
|
|
throw friendlyConnError("anthropic", "api.anthropic.com", e);
|
|
}
|
|
} else {
|
|
throw new Error(`Anthropic error (400): ${detail}`);
|
|
}
|
|
}
|
|
if (!res.ok) throw new Error(`Anthropic error (${res.status}): ${await readError(res)}`);
|
|
|
|
let out = "";
|
|
try {
|
|
await readSse(res, (obj) => {
|
|
if (obj?.type === "content_block_delta" && obj.delta?.type === "text_delta") out += obj.delta.text;
|
|
if (obj?.type === "error") throw new Error(`Anthropic error: ${obj.error?.message || JSON.stringify(obj.error)}`);
|
|
});
|
|
} catch (e) {
|
|
if (/^Anthropic error:/.test(String(e?.message))) throw e;
|
|
throw friendlyConnError("anthropic", "api.anthropic.com", e);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
// ---------- Google ----------
|
|
async function googleChat(cfg, { system, user, temperature, maxTokens, expectJson, signal }) {
|
|
const url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(cfg.model)}:streamGenerateContent?alt=sse&key=${encodeURIComponent(cfg.apiKey)}`;
|
|
let res;
|
|
try {
|
|
res = await fetch(url, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
signal,
|
|
body: JSON.stringify({
|
|
...(system ? { systemInstruction: { parts: [{ text: system }] } } : {}),
|
|
contents: [{ role: "user", parts: [{ text: user }] }],
|
|
generationConfig: {
|
|
temperature,
|
|
maxOutputTokens: maxTokens,
|
|
...(expectJson ? { responseMimeType: "application/json" } : {}),
|
|
},
|
|
}),
|
|
});
|
|
} catch (e) {
|
|
throw friendlyConnError("google", "generativelanguage.googleapis.com", e);
|
|
}
|
|
if (!res.ok) throw new Error(`Google AI error (${res.status}): ${await readError(res)}`);
|
|
|
|
let out = "";
|
|
try {
|
|
await readSse(res, (obj) => {
|
|
if (obj?.error) throw new Error(`Google AI error: ${obj.error?.message || JSON.stringify(obj.error)}`);
|
|
const parts = obj?.candidates?.[0]?.content?.parts || [];
|
|
out += parts.map((p) => p.text || "").join("");
|
|
});
|
|
} catch (e) {
|
|
if (/^Google AI error:/.test(String(e?.message))) throw e;
|
|
throw friendlyConnError("google", "generativelanguage.googleapis.com", e);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// listModels(settings, provider) -> string[]
|
|
// ----------------------------------------------------------------------------
|
|
export async function listModels(settings, provider) {
|
|
const cfg = settings.providers?.[provider] || {};
|
|
const t = AbortSignal.timeout(15000);
|
|
try {
|
|
if (provider === "ollama") {
|
|
const base = cleanBase(cfg.baseUrl, "http://localhost:11434");
|
|
const res = await fetch(base + "/api/tags", { signal: t }).catch((e) => { throw friendlyConnError("ollama", base, e); });
|
|
if (!res.ok) throw new Error(`Ollama error (${res.status}): ${await readError(res)}`);
|
|
const data = await res.json();
|
|
const models = (data?.models || []).map((m) => m.name).sort();
|
|
if (!models.length) throw new Error("Ollama is running but has no models installed. Run e.g. `ollama pull llama3.1:8b` first.");
|
|
return models;
|
|
}
|
|
if (provider === "lmstudio") {
|
|
const base = cleanBase(cfg.baseUrl, "http://localhost:1234");
|
|
const res = await fetch(base + "/v1/models", { signal: t }).catch((e) => { throw friendlyConnError("lmstudio", base, e); });
|
|
if (!res.ok) throw new Error(`LM Studio error (${res.status}): ${await readError(res)}`);
|
|
const data = await res.json();
|
|
const models = (data?.data || []).map((m) => m.id).sort();
|
|
if (!models.length) throw new Error("LM Studio server is running but no model is loaded. Load a model in LM Studio first.");
|
|
return models;
|
|
}
|
|
if (provider === "openai") {
|
|
requireKey(cfg, "OpenAI");
|
|
const res = await fetch("https://api.openai.com/v1/models", {
|
|
headers: { Authorization: "Bearer " + cfg.apiKey }, signal: t,
|
|
}).catch((e) => { throw friendlyConnError("openai", "api.openai.com", e); });
|
|
if (!res.ok) throw new Error(`OpenAI error (${res.status}): ${await readError(res)}`);
|
|
const data = await res.json();
|
|
return (data?.data || [])
|
|
.map((m) => m.id)
|
|
.filter((id) => /^(gpt-|o\d|chatgpt-)/.test(id) && !/audio|realtime|tts|whisper|image|embed|moderation|transcribe|search/.test(id))
|
|
.sort();
|
|
}
|
|
if (provider === "anthropic") {
|
|
requireKey(cfg, "Anthropic");
|
|
const res = await fetch("https://api.anthropic.com/v1/models?limit=100", {
|
|
headers: { "x-api-key": cfg.apiKey, ...VERSION_HEADER }, signal: t,
|
|
}).catch((e) => { throw friendlyConnError("anthropic", "api.anthropic.com", e); });
|
|
if (!res.ok) throw new Error(`Anthropic error (${res.status}): ${await readError(res)}`);
|
|
const data = await res.json();
|
|
return (data?.data || []).map((m) => m.id).sort();
|
|
}
|
|
if (provider === "google") {
|
|
requireKey(cfg, "Google");
|
|
const res = await fetch(
|
|
`https://generativelanguage.googleapis.com/v1beta/models?pageSize=100&key=${encodeURIComponent(cfg.apiKey)}`,
|
|
{ signal: t }
|
|
).catch((e) => { throw friendlyConnError("google", "generativelanguage.googleapis.com", e); });
|
|
if (!res.ok) throw new Error(`Google AI error (${res.status}): ${await readError(res)}`);
|
|
const data = await res.json();
|
|
return (data?.models || [])
|
|
.filter((m) => (m.supportedGenerationMethods || []).includes("generateContent"))
|
|
.map((m) => String(m.name || "").replace(/^models\//, ""))
|
|
.sort();
|
|
}
|
|
throw new Error("Unknown provider: " + provider);
|
|
} catch (e) {
|
|
if (e?.name === "TimeoutError") throw new Error(`Timed out reaching ${providerLabel(provider)}.`);
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// testConnection(settings, provider) -> { ok, message }
|
|
// ----------------------------------------------------------------------------
|
|
export async function testConnection(settings, provider) {
|
|
const probe = { ...settings, provider };
|
|
const reply = await chat(probe, {
|
|
system: "You are a connection test. Reply with exactly: OK",
|
|
user: "Reply with exactly: OK",
|
|
temperature: 0,
|
|
// Reasoning models (o-series, gpt-5, gemini-2.5, Claude thinking) spend tokens
|
|
// on hidden reasoning before any visible text, so keep a generous budget here
|
|
// or the reply comes back empty even though the connection is fine.
|
|
maxTokens: 2048,
|
|
});
|
|
if (!reply || !reply.trim()) {
|
|
throw new Error(
|
|
"The connection worked but the model returned no text. If this is a reasoning model, it may have used its whole token budget on internal reasoning — try a non-reasoning model, or this is usually safe to ignore."
|
|
);
|
|
}
|
|
return { ok: true, message: `Connected. Model replied: "${reply.trim().slice(0, 40)}"` };
|
|
}
|