// lib/providers.js — one interface, five backends. // All calls are made server-side (no CORS issues, keys never touch the browser). import { resolveGeneration } from "./model-caps"; const VERSION_HEADER = { "anthropic-version": "2023-06-01" }; function cleanBase(url, fallback) { let b = (url || fallback || "").trim(); if (!b) return fallback; return b.replace(/\/+$/, ""); } async function readError(res) { let detail = ""; try { const j = await res.json(); detail = j?.error?.message || j?.error || j?.message || JSON.stringify(j).slice(0, 200); } catch { try { detail = (await res.text()).slice(0, 200); } catch {} } return detail; } function friendlyConnError(provider, base, err) { const msg = String(err?.message || err); const cause = String(err?.cause?.code || err?.cause?.message || ""); // Node's fetch aborts requests that go quiet too long; without this check a // slow (but healthy) local model gets misreported as "not running". if (/timeout/i.test(msg + " " + cause)) { return new Error( `${providerLabel(provider)} took too long to respond and the connection timed out. The model may still be working — for local models, try a smaller/faster model or wait and retry.` ); } if (/fetch failed|ECONNREFUSED|ENOTFOUND|EAI_AGAIN|aborted|network|terminated|socket/i.test(msg + " " + cause)) { if (provider === "ollama") return new Error(`Could not reach Ollama at ${base}. Make sure Ollama is running (open the Ollama app, or run \`ollama serve\`).`); if (provider === "lmstudio") return new Error(`Could not reach LM Studio at ${base}. In LM Studio, open the Developer tab and start the local server.`); return new Error(`Could not reach the ${provider} API. Check your internet connection. (${msg})`); } return err; } // ---------------------------------------------------------------------------- // chat(settings, opts) -> string // opts: { system, user, temperature, maxTokens, expectJson } // ---------------------------------------------------------------------------- export async function chat(settings, opts) { const provider = settings.provider; const cfg = settings.providers?.[provider] || {}; const { system = "", user = "", expectJson = false, signal } = opts; if (!cfg.model) { throw new Error(`No model selected for ${providerLabel(provider)}. Open Settings, pick a model, and save.`); } // Per-model defaults: auto mode sizes these to the model's real limits. const resolved = await resolveGeneration(settings); const temperature = opts.temperature ?? resolved.temperature; let maxTokens = opts.maxTokens ?? resolved.maxTokens; if (resolved.caps.maxOutputTokens) maxTokens = Math.min(maxTokens, resolved.caps.maxOutputTokens); switch (provider) { case "ollama": return ollamaChat(cfg, { system, user, temperature, maxTokens, expectJson, signal, // Only size num_ctx when we actually know the model's window. contextTokens: resolved.caps.source === "live" ? resolved.caps.contextTokens : 0, }); case "lmstudio": return openaiCompatChat("lmstudio", cleanBase(cfg.baseUrl, "http://localhost:1234") + "/v1", null, cfg.model, { system, user, temperature, maxTokens, expectJson, signal }); case "openai": requireKey(cfg, "OpenAI"); return openaiCompatChat("openai", "https://api.openai.com/v1", cfg.apiKey, cfg.model, { system, user, temperature, maxTokens, expectJson, signal }); case "anthropic": requireKey(cfg, "Anthropic"); return anthropicChat(cfg, { system, user, temperature, maxTokens, signal }); case "google": requireKey(cfg, "Google"); return googleChat(cfg, { system, user, temperature, maxTokens, expectJson, signal }); default: throw new Error(`Unknown provider: ${provider}`); } } function requireKey(cfg, name) { if (!cfg.apiKey) throw new Error(`No API key set for ${name}. Add it on the Settings page.`); } // ---------- streaming helpers ---------- // All backends stream and accumulate server-side. Non-streaming requests sit // silent until the full response is ready, and Node's fetch kills any request // whose headers take >5 minutes — which long local generations routinely do. // Streaming returns headers instantly and each chunk resets the idle timer. async function* streamLines(res) { const decoder = new TextDecoder(); let buf = ""; for await (const chunk of res.body) { buf += decoder.decode(chunk, { stream: true }); let i; while ((i = buf.indexOf("\n")) !== -1) { const line = buf.slice(0, i).trim(); buf = buf.slice(i + 1); if (line) yield line; } } const last = (buf + decoder.decode()).trim(); if (last) yield last; } // Parse server-sent events, invoking onEvent for each JSON data payload. async function readSse(res, onEvent) { for await (const line of streamLines(res)) { if (!line.startsWith("data:")) continue; const payload = line.slice(5).trim(); if (!payload || payload === "[DONE]") continue; let obj; try { obj = JSON.parse(payload); } catch { continue; } onEvent(obj); } } export function providerLabel(p) { return { ollama: "Ollama", lmstudio: "LM Studio", openai: "OpenAI", anthropic: "Anthropic", google: "Google AI" }[p] || p; } // ---------- Ollama ---------- async function ollamaChat(cfg, { system, user, temperature, maxTokens, expectJson, signal, contextTokens }) { const base = cleanBase(cfg.baseUrl, "http://localhost:11434"); const options = { temperature, num_predict: maxTokens }; // Ollama defaults num_ctx to ~4k and silently truncates longer prompts, so // size the window to this request, bounded by the model's real maximum. // chars/3 over-estimates tokens on purpose; the margin covers chat template. if (contextTokens) { const needed = Math.ceil((system.length + user.length) / 3) + maxTokens + 512; options.num_ctx = Math.min(contextTokens, Math.max(4096, Math.ceil(needed / 1024) * 1024)); } let res; try { res = await fetch(base + "/api/chat", { method: "POST", headers: { "Content-Type": "application/json" }, signal, body: JSON.stringify({ model: cfg.model, stream: true, messages: [ ...(system ? [{ role: "system", content: system }] : []), { role: "user", content: user }, ], options, ...(expectJson ? { format: "json" } : {}), }), }); } catch (e) { throw friendlyConnError("ollama", base, e); } if (!res.ok) throw new Error(`Ollama error (${res.status}): ${await readError(res)}`); // NDJSON stream: one JSON object per line. let out = ""; try { for await (const line of streamLines(res)) { let obj; try { obj = JSON.parse(line); } catch { continue; } if (obj?.error) throw new Error(`Ollama error: ${obj.error}`); if (obj?.message?.content) out += obj.message.content; } } catch (e) { if (/^Ollama error:/.test(String(e?.message))) throw e; throw friendlyConnError("ollama", base, e); } return out; } // ---------- OpenAI-compatible (OpenAI + LM Studio) ---------- async function openaiCompatChat(provider, base, apiKey, model, { system, user, temperature, maxTokens, expectJson, signal }) { const headers = { "Content-Type": "application/json" }; if (apiKey) headers["Authorization"] = "Bearer " + apiKey; const body = { model, stream: true, temperature, max_tokens: maxTokens, messages: [ ...(system ? [{ role: "system", content: system }] : []), { role: "user", content: user }, ], }; if (expectJson && provider === "openai") body.response_format = { type: "json_object" }; const send = () => fetch(base + "/chat/completions", { method: "POST", headers, signal, body: JSON.stringify(body) }); let res; try { res = await send(); } catch (e) { throw friendlyConnError(provider, base, e); } // Some models reject response_format, temperature, or streaming; retry once without. if (!res.ok) { const detail = await readError(res); let changed = false; if (/response_format|temperature|unsupported|param/i.test(detail) && (body.response_format || body.temperature !== undefined)) { delete body.response_format; delete body.temperature; body.max_completion_tokens = body.max_tokens; delete body.max_tokens; changed = true; } if (/stream/i.test(detail)) { body.stream = false; changed = true; } if (!changed) throw new Error(`${providerLabel(provider)} error (${res.status}): ${detail}`); try { res = await send(); } catch (e) { throw friendlyConnError(provider, base, e); } if (!res.ok) throw new Error(`${providerLabel(provider)} error (${res.status}): ${await readError(res)}`); } if (!body.stream) { const data = await res.json(); return data?.choices?.[0]?.message?.content ?? ""; } let out = ""; try { await readSse(res, (obj) => { const delta = obj?.choices?.[0]?.delta; if (delta?.content) out += delta.content; }); } catch (e) { throw friendlyConnError(provider, base, e); } return out; } // ---------- Anthropic ---------- async function anthropicChat(cfg, { system, user, temperature, maxTokens, signal }) { const body = { model: cfg.model, max_tokens: maxTokens, temperature, stream: true, ...(system ? { system } : {}), messages: [{ role: "user", content: user }], }; const send = () => fetch("https://api.anthropic.com/v1/messages", { method: "POST", headers: { "Content-Type": "application/json", "x-api-key": cfg.apiKey, ...VERSION_HEADER }, signal, body: JSON.stringify(body), }); let res; try { res = await send(); } catch (e) { throw friendlyConnError("anthropic", "api.anthropic.com", e); } // Newer Anthropic models (Opus 4.7+, Fable) reject sampling parameters; // retry once without temperature. if (!res.ok && res.status === 400 && body.temperature !== undefined) { const detail = await readError(res); if (/temperature|top_p|top_k|sampling/i.test(detail)) { delete body.temperature; try { res = await send(); } catch (e) { throw friendlyConnError("anthropic", "api.anthropic.com", e); } } else { throw new Error(`Anthropic error (400): ${detail}`); } } if (!res.ok) throw new Error(`Anthropic error (${res.status}): ${await readError(res)}`); let out = ""; try { await readSse(res, (obj) => { if (obj?.type === "content_block_delta" && obj.delta?.type === "text_delta") out += obj.delta.text; if (obj?.type === "error") throw new Error(`Anthropic error: ${obj.error?.message || JSON.stringify(obj.error)}`); }); } catch (e) { if (/^Anthropic error:/.test(String(e?.message))) throw e; throw friendlyConnError("anthropic", "api.anthropic.com", e); } return out; } // ---------- Google ---------- async function googleChat(cfg, { system, user, temperature, maxTokens, expectJson, signal }) { const url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(cfg.model)}:streamGenerateContent?alt=sse&key=${encodeURIComponent(cfg.apiKey)}`; let res; try { res = await fetch(url, { method: "POST", headers: { "Content-Type": "application/json" }, signal, body: JSON.stringify({ ...(system ? { systemInstruction: { parts: [{ text: system }] } } : {}), contents: [{ role: "user", parts: [{ text: user }] }], generationConfig: { temperature, maxOutputTokens: maxTokens, ...(expectJson ? { responseMimeType: "application/json" } : {}), }, }), }); } catch (e) { throw friendlyConnError("google", "generativelanguage.googleapis.com", e); } if (!res.ok) throw new Error(`Google AI error (${res.status}): ${await readError(res)}`); let out = ""; try { await readSse(res, (obj) => { if (obj?.error) throw new Error(`Google AI error: ${obj.error?.message || JSON.stringify(obj.error)}`); const parts = obj?.candidates?.[0]?.content?.parts || []; out += parts.map((p) => p.text || "").join(""); }); } catch (e) { if (/^Google AI error:/.test(String(e?.message))) throw e; throw friendlyConnError("google", "generativelanguage.googleapis.com", e); } return out; } // ---------------------------------------------------------------------------- // listModels(settings, provider) -> string[] // ---------------------------------------------------------------------------- export async function listModels(settings, provider) { const cfg = settings.providers?.[provider] || {}; const t = AbortSignal.timeout(15000); try { if (provider === "ollama") { const base = cleanBase(cfg.baseUrl, "http://localhost:11434"); const res = await fetch(base + "/api/tags", { signal: t }).catch((e) => { throw friendlyConnError("ollama", base, e); }); if (!res.ok) throw new Error(`Ollama error (${res.status}): ${await readError(res)}`); const data = await res.json(); const models = (data?.models || []).map((m) => m.name).sort(); if (!models.length) throw new Error("Ollama is running but has no models installed. Run e.g. `ollama pull llama3.1:8b` first."); return models; } if (provider === "lmstudio") { const base = cleanBase(cfg.baseUrl, "http://localhost:1234"); const res = await fetch(base + "/v1/models", { signal: t }).catch((e) => { throw friendlyConnError("lmstudio", base, e); }); if (!res.ok) throw new Error(`LM Studio error (${res.status}): ${await readError(res)}`); const data = await res.json(); const models = (data?.data || []).map((m) => m.id).sort(); if (!models.length) throw new Error("LM Studio server is running but no model is loaded. Load a model in LM Studio first."); return models; } if (provider === "openai") { requireKey(cfg, "OpenAI"); const res = await fetch("https://api.openai.com/v1/models", { headers: { Authorization: "Bearer " + cfg.apiKey }, signal: t, }).catch((e) => { throw friendlyConnError("openai", "api.openai.com", e); }); if (!res.ok) throw new Error(`OpenAI error (${res.status}): ${await readError(res)}`); const data = await res.json(); return (data?.data || []) .map((m) => m.id) .filter((id) => /^(gpt-|o\d|chatgpt-)/.test(id) && !/audio|realtime|tts|whisper|image|embed|moderation|transcribe|search/.test(id)) .sort(); } if (provider === "anthropic") { requireKey(cfg, "Anthropic"); const res = await fetch("https://api.anthropic.com/v1/models?limit=100", { headers: { "x-api-key": cfg.apiKey, ...VERSION_HEADER }, signal: t, }).catch((e) => { throw friendlyConnError("anthropic", "api.anthropic.com", e); }); if (!res.ok) throw new Error(`Anthropic error (${res.status}): ${await readError(res)}`); const data = await res.json(); return (data?.data || []).map((m) => m.id).sort(); } if (provider === "google") { requireKey(cfg, "Google"); const res = await fetch( `https://generativelanguage.googleapis.com/v1beta/models?pageSize=100&key=${encodeURIComponent(cfg.apiKey)}`, { signal: t } ).catch((e) => { throw friendlyConnError("google", "generativelanguage.googleapis.com", e); }); if (!res.ok) throw new Error(`Google AI error (${res.status}): ${await readError(res)}`); const data = await res.json(); return (data?.models || []) .filter((m) => (m.supportedGenerationMethods || []).includes("generateContent")) .map((m) => String(m.name || "").replace(/^models\//, "")) .sort(); } throw new Error("Unknown provider: " + provider); } catch (e) { if (e?.name === "TimeoutError") throw new Error(`Timed out reaching ${providerLabel(provider)}.`); throw e; } } // ---------------------------------------------------------------------------- // testConnection(settings, provider) -> { ok, message } // ---------------------------------------------------------------------------- export async function testConnection(settings, provider) { const probe = { ...settings, provider }; const reply = await chat(probe, { system: "You are a connection test. Reply with exactly: OK", user: "Reply with exactly: OK", temperature: 0, // Reasoning models (o-series, gpt-5, gemini-2.5, Claude thinking) spend tokens // on hidden reasoning before any visible text, so keep a generous budget here // or the reply comes back empty even though the connection is fine. maxTokens: 2048, }); if (!reply || !reply.trim()) { throw new Error( "The connection worked but the model returned no text. If this is a reasoning model, it may have used its whole token budget on internal reasoning — try a non-reasoning model, or this is usually safe to ignore." ); } return { ok: true, message: `Connected. Model replied: "${reply.trim().slice(0, 40)}"` }; }