bizzle 5a51a0f112 Mr. Drew's Assignment Creator — Docker share build
Self-contained Dockerized build for end users. Run via docker compose;
see README.md for setup. Source-only, no sample data or build artifacts.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-21 19:58:36 -04:00

424 lines
17 KiB
JavaScript

// lib/providers.js — one interface, five backends.
// All calls are made server-side (no CORS issues, keys never touch the browser).
import { resolveGeneration } from "./model-caps";
const VERSION_HEADER = { "anthropic-version": "2023-06-01" };
function cleanBase(url, fallback) {
let b = (url || fallback || "").trim();
if (!b) return fallback;
return b.replace(/\/+$/, "");
}
async function readError(res) {
let detail = "";
try {
const j = await res.json();
detail = j?.error?.message || j?.error || j?.message || JSON.stringify(j).slice(0, 200);
} catch {
try { detail = (await res.text()).slice(0, 200); } catch {}
}
return detail;
}
function friendlyConnError(provider, base, err) {
const msg = String(err?.message || err);
const cause = String(err?.cause?.code || err?.cause?.message || "");
// Node's fetch aborts requests that go quiet too long; without this check a
// slow (but healthy) local model gets misreported as "not running".
if (/timeout/i.test(msg + " " + cause)) {
return new Error(
`${providerLabel(provider)} took too long to respond and the connection timed out. The model may still be working — for local models, try a smaller/faster model or wait and retry.`
);
}
if (/fetch failed|ECONNREFUSED|ENOTFOUND|EAI_AGAIN|aborted|network|terminated|socket/i.test(msg + " " + cause)) {
if (provider === "ollama")
return new Error(`Could not reach Ollama at ${base}. Make sure Ollama is running (open the Ollama app, or run \`ollama serve\`).`);
if (provider === "lmstudio")
return new Error(`Could not reach LM Studio at ${base}. In LM Studio, open the Developer tab and start the local server.`);
return new Error(`Could not reach the ${provider} API. Check your internet connection. (${msg})`);
}
return err;
}
// ----------------------------------------------------------------------------
// chat(settings, opts) -> string
// opts: { system, user, temperature, maxTokens, expectJson }
// ----------------------------------------------------------------------------
export async function chat(settings, opts) {
const provider = settings.provider;
const cfg = settings.providers?.[provider] || {};
const { system = "", user = "", expectJson = false, signal } = opts;
if (!cfg.model) {
throw new Error(`No model selected for ${providerLabel(provider)}. Open Settings, pick a model, and save.`);
}
// Per-model defaults: auto mode sizes these to the model's real limits.
const resolved = await resolveGeneration(settings);
const temperature = opts.temperature ?? resolved.temperature;
let maxTokens = opts.maxTokens ?? resolved.maxTokens;
if (resolved.caps.maxOutputTokens) maxTokens = Math.min(maxTokens, resolved.caps.maxOutputTokens);
switch (provider) {
case "ollama":
return ollamaChat(cfg, {
system, user, temperature, maxTokens, expectJson, signal,
// Only size num_ctx when we actually know the model's window.
contextTokens: resolved.caps.source === "live" ? resolved.caps.contextTokens : 0,
});
case "lmstudio":
return openaiCompatChat("lmstudio", cleanBase(cfg.baseUrl, "http://localhost:1234") + "/v1", null, cfg.model, { system, user, temperature, maxTokens, expectJson, signal });
case "openai":
requireKey(cfg, "OpenAI");
return openaiCompatChat("openai", "https://api.openai.com/v1", cfg.apiKey, cfg.model, { system, user, temperature, maxTokens, expectJson, signal });
case "anthropic":
requireKey(cfg, "Anthropic");
return anthropicChat(cfg, { system, user, temperature, maxTokens, signal });
case "google":
requireKey(cfg, "Google");
return googleChat(cfg, { system, user, temperature, maxTokens, expectJson, signal });
default:
throw new Error(`Unknown provider: ${provider}`);
}
}
function requireKey(cfg, name) {
if (!cfg.apiKey) throw new Error(`No API key set for ${name}. Add it on the Settings page.`);
}
// ---------- streaming helpers ----------
// All backends stream and accumulate server-side. Non-streaming requests sit
// silent until the full response is ready, and Node's fetch kills any request
// whose headers take >5 minutes — which long local generations routinely do.
// Streaming returns headers instantly and each chunk resets the idle timer.
async function* streamLines(res) {
const decoder = new TextDecoder();
let buf = "";
for await (const chunk of res.body) {
buf += decoder.decode(chunk, { stream: true });
let i;
while ((i = buf.indexOf("\n")) !== -1) {
const line = buf.slice(0, i).trim();
buf = buf.slice(i + 1);
if (line) yield line;
}
}
const last = (buf + decoder.decode()).trim();
if (last) yield last;
}
// Parse server-sent events, invoking onEvent for each JSON data payload.
async function readSse(res, onEvent) {
for await (const line of streamLines(res)) {
if (!line.startsWith("data:")) continue;
const payload = line.slice(5).trim();
if (!payload || payload === "[DONE]") continue;
let obj;
try { obj = JSON.parse(payload); } catch { continue; }
onEvent(obj);
}
}
export function providerLabel(p) {
return { ollama: "Ollama", lmstudio: "LM Studio", openai: "OpenAI", anthropic: "Anthropic", google: "Google AI" }[p] || p;
}
// ---------- Ollama ----------
async function ollamaChat(cfg, { system, user, temperature, maxTokens, expectJson, signal, contextTokens }) {
const base = cleanBase(cfg.baseUrl, "http://localhost:11434");
const options = { temperature, num_predict: maxTokens };
// Ollama defaults num_ctx to ~4k and silently truncates longer prompts, so
// size the window to this request, bounded by the model's real maximum.
// chars/3 over-estimates tokens on purpose; the margin covers chat template.
if (contextTokens) {
const needed = Math.ceil((system.length + user.length) / 3) + maxTokens + 512;
options.num_ctx = Math.min(contextTokens, Math.max(4096, Math.ceil(needed / 1024) * 1024));
}
let res;
try {
res = await fetch(base + "/api/chat", {
method: "POST",
headers: { "Content-Type": "application/json" },
signal,
body: JSON.stringify({
model: cfg.model,
stream: true,
messages: [
...(system ? [{ role: "system", content: system }] : []),
{ role: "user", content: user },
],
options,
...(expectJson ? { format: "json" } : {}),
}),
});
} catch (e) {
throw friendlyConnError("ollama", base, e);
}
if (!res.ok) throw new Error(`Ollama error (${res.status}): ${await readError(res)}`);
// NDJSON stream: one JSON object per line.
let out = "";
try {
for await (const line of streamLines(res)) {
let obj;
try { obj = JSON.parse(line); } catch { continue; }
if (obj?.error) throw new Error(`Ollama error: ${obj.error}`);
if (obj?.message?.content) out += obj.message.content;
}
} catch (e) {
if (/^Ollama error:/.test(String(e?.message))) throw e;
throw friendlyConnError("ollama", base, e);
}
return out;
}
// ---------- OpenAI-compatible (OpenAI + LM Studio) ----------
async function openaiCompatChat(provider, base, apiKey, model, { system, user, temperature, maxTokens, expectJson, signal }) {
const headers = { "Content-Type": "application/json" };
if (apiKey) headers["Authorization"] = "Bearer " + apiKey;
const body = {
model,
stream: true,
temperature,
max_tokens: maxTokens,
messages: [
...(system ? [{ role: "system", content: system }] : []),
{ role: "user", content: user },
],
};
if (expectJson && provider === "openai") body.response_format = { type: "json_object" };
const send = () =>
fetch(base + "/chat/completions", { method: "POST", headers, signal, body: JSON.stringify(body) });
let res;
try {
res = await send();
} catch (e) {
throw friendlyConnError(provider, base, e);
}
// Some models reject response_format, temperature, or streaming; retry once without.
if (!res.ok) {
const detail = await readError(res);
let changed = false;
if (/response_format|temperature|unsupported|param/i.test(detail) && (body.response_format || body.temperature !== undefined)) {
delete body.response_format;
delete body.temperature;
body.max_completion_tokens = body.max_tokens;
delete body.max_tokens;
changed = true;
}
if (/stream/i.test(detail)) {
body.stream = false;
changed = true;
}
if (!changed) throw new Error(`${providerLabel(provider)} error (${res.status}): ${detail}`);
try {
res = await send();
} catch (e) {
throw friendlyConnError(provider, base, e);
}
if (!res.ok) throw new Error(`${providerLabel(provider)} error (${res.status}): ${await readError(res)}`);
}
if (!body.stream) {
const data = await res.json();
return data?.choices?.[0]?.message?.content ?? "";
}
let out = "";
try {
await readSse(res, (obj) => {
const delta = obj?.choices?.[0]?.delta;
if (delta?.content) out += delta.content;
});
} catch (e) {
throw friendlyConnError(provider, base, e);
}
return out;
}
// ---------- Anthropic ----------
async function anthropicChat(cfg, { system, user, temperature, maxTokens, signal }) {
const body = {
model: cfg.model,
max_tokens: maxTokens,
temperature,
stream: true,
...(system ? { system } : {}),
messages: [{ role: "user", content: user }],
};
const send = () =>
fetch("https://api.anthropic.com/v1/messages", {
method: "POST",
headers: { "Content-Type": "application/json", "x-api-key": cfg.apiKey, ...VERSION_HEADER },
signal,
body: JSON.stringify(body),
});
let res;
try {
res = await send();
} catch (e) {
throw friendlyConnError("anthropic", "api.anthropic.com", e);
}
// Newer Anthropic models (Opus 4.7+, Fable) reject sampling parameters;
// retry once without temperature.
if (!res.ok && res.status === 400 && body.temperature !== undefined) {
const detail = await readError(res);
if (/temperature|top_p|top_k|sampling/i.test(detail)) {
delete body.temperature;
try {
res = await send();
} catch (e) {
throw friendlyConnError("anthropic", "api.anthropic.com", e);
}
} else {
throw new Error(`Anthropic error (400): ${detail}`);
}
}
if (!res.ok) throw new Error(`Anthropic error (${res.status}): ${await readError(res)}`);
let out = "";
try {
await readSse(res, (obj) => {
if (obj?.type === "content_block_delta" && obj.delta?.type === "text_delta") out += obj.delta.text;
if (obj?.type === "error") throw new Error(`Anthropic error: ${obj.error?.message || JSON.stringify(obj.error)}`);
});
} catch (e) {
if (/^Anthropic error:/.test(String(e?.message))) throw e;
throw friendlyConnError("anthropic", "api.anthropic.com", e);
}
return out;
}
// ---------- Google ----------
async function googleChat(cfg, { system, user, temperature, maxTokens, expectJson, signal }) {
const url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(cfg.model)}:streamGenerateContent?alt=sse&key=${encodeURIComponent(cfg.apiKey)}`;
let res;
try {
res = await fetch(url, {
method: "POST",
headers: { "Content-Type": "application/json" },
signal,
body: JSON.stringify({
...(system ? { systemInstruction: { parts: [{ text: system }] } } : {}),
contents: [{ role: "user", parts: [{ text: user }] }],
generationConfig: {
temperature,
maxOutputTokens: maxTokens,
...(expectJson ? { responseMimeType: "application/json" } : {}),
},
}),
});
} catch (e) {
throw friendlyConnError("google", "generativelanguage.googleapis.com", e);
}
if (!res.ok) throw new Error(`Google AI error (${res.status}): ${await readError(res)}`);
let out = "";
try {
await readSse(res, (obj) => {
if (obj?.error) throw new Error(`Google AI error: ${obj.error?.message || JSON.stringify(obj.error)}`);
const parts = obj?.candidates?.[0]?.content?.parts || [];
out += parts.map((p) => p.text || "").join("");
});
} catch (e) {
if (/^Google AI error:/.test(String(e?.message))) throw e;
throw friendlyConnError("google", "generativelanguage.googleapis.com", e);
}
return out;
}
// ----------------------------------------------------------------------------
// listModels(settings, provider) -> string[]
// ----------------------------------------------------------------------------
export async function listModels(settings, provider) {
const cfg = settings.providers?.[provider] || {};
const t = AbortSignal.timeout(15000);
try {
if (provider === "ollama") {
const base = cleanBase(cfg.baseUrl, "http://localhost:11434");
const res = await fetch(base + "/api/tags", { signal: t }).catch((e) => { throw friendlyConnError("ollama", base, e); });
if (!res.ok) throw new Error(`Ollama error (${res.status}): ${await readError(res)}`);
const data = await res.json();
const models = (data?.models || []).map((m) => m.name).sort();
if (!models.length) throw new Error("Ollama is running but has no models installed. Run e.g. `ollama pull llama3.1:8b` first.");
return models;
}
if (provider === "lmstudio") {
const base = cleanBase(cfg.baseUrl, "http://localhost:1234");
const res = await fetch(base + "/v1/models", { signal: t }).catch((e) => { throw friendlyConnError("lmstudio", base, e); });
if (!res.ok) throw new Error(`LM Studio error (${res.status}): ${await readError(res)}`);
const data = await res.json();
const models = (data?.data || []).map((m) => m.id).sort();
if (!models.length) throw new Error("LM Studio server is running but no model is loaded. Load a model in LM Studio first.");
return models;
}
if (provider === "openai") {
requireKey(cfg, "OpenAI");
const res = await fetch("https://api.openai.com/v1/models", {
headers: { Authorization: "Bearer " + cfg.apiKey }, signal: t,
}).catch((e) => { throw friendlyConnError("openai", "api.openai.com", e); });
if (!res.ok) throw new Error(`OpenAI error (${res.status}): ${await readError(res)}`);
const data = await res.json();
return (data?.data || [])
.map((m) => m.id)
.filter((id) => /^(gpt-|o\d|chatgpt-)/.test(id) && !/audio|realtime|tts|whisper|image|embed|moderation|transcribe|search/.test(id))
.sort();
}
if (provider === "anthropic") {
requireKey(cfg, "Anthropic");
const res = await fetch("https://api.anthropic.com/v1/models?limit=100", {
headers: { "x-api-key": cfg.apiKey, ...VERSION_HEADER }, signal: t,
}).catch((e) => { throw friendlyConnError("anthropic", "api.anthropic.com", e); });
if (!res.ok) throw new Error(`Anthropic error (${res.status}): ${await readError(res)}`);
const data = await res.json();
return (data?.data || []).map((m) => m.id).sort();
}
if (provider === "google") {
requireKey(cfg, "Google");
const res = await fetch(
`https://generativelanguage.googleapis.com/v1beta/models?pageSize=100&key=${encodeURIComponent(cfg.apiKey)}`,
{ signal: t }
).catch((e) => { throw friendlyConnError("google", "generativelanguage.googleapis.com", e); });
if (!res.ok) throw new Error(`Google AI error (${res.status}): ${await readError(res)}`);
const data = await res.json();
return (data?.models || [])
.filter((m) => (m.supportedGenerationMethods || []).includes("generateContent"))
.map((m) => String(m.name || "").replace(/^models\//, ""))
.sort();
}
throw new Error("Unknown provider: " + provider);
} catch (e) {
if (e?.name === "TimeoutError") throw new Error(`Timed out reaching ${providerLabel(provider)}.`);
throw e;
}
}
// ----------------------------------------------------------------------------
// testConnection(settings, provider) -> { ok, message }
// ----------------------------------------------------------------------------
export async function testConnection(settings, provider) {
const probe = { ...settings, provider };
const reply = await chat(probe, {
system: "You are a connection test. Reply with exactly: OK",
user: "Reply with exactly: OK",
temperature: 0,
// Reasoning models (o-series, gpt-5, gemini-2.5, Claude thinking) spend tokens
// on hidden reasoning before any visible text, so keep a generous budget here
// or the reply comes back empty even though the connection is fine.
maxTokens: 2048,
});
if (!reply || !reply.trim()) {
throw new Error(
"The connection worked but the model returned no text. If this is a reasoning model, it may have used its whole token budget on internal reasoning — try a non-reasoning model, or this is usually safe to ignore."
);
}
return { ok: true, message: `Connected. Model replied: "${reply.trim().slice(0, 40)}"` };
}