mr-drews-assignment-creator/lib/providers.js

// lib/providers.js — one interface, five backends.
// All calls are made server-side (no CORS issues, keys never touch the browser).
import { resolveGeneration } from "./model-caps";

const VERSION_HEADER = { "anthropic-version": "2023-06-01" };

function cleanBase(url, fallback) {
  let b = (url || fallback || "").trim();
  if (!b) return fallback;
  return b.replace(/\/+$/, "");
}

async function readError(res) {
  let detail = "";
  try {
    const j = await res.json();
    detail = j?.error?.message || j?.error || j?.message || JSON.stringify(j).slice(0, 200);
  } catch {
    try { detail = (await res.text()).slice(0, 200); } catch {}
  }
  return detail;
}

function friendlyConnError(provider, base, err) {
  const msg = String(err?.message || err);
  const cause = String(err?.cause?.code || err?.cause?.message || "");
  // Node's fetch aborts requests that go quiet too long; without this check a
  // slow (but healthy) local model gets misreported as "not running".
  if (/timeout/i.test(msg + " " + cause)) {
    return new Error(
      `${providerLabel(provider)} took too long to respond and the connection timed out. The model may still be working — for local models, try a smaller/faster model or wait and retry.`
    );
  }
  if (/fetch failed|ECONNREFUSED|ENOTFOUND|EAI_AGAIN|aborted|network|terminated|socket/i.test(msg + " " + cause)) {
    if (provider === "ollama")
      return new Error(`Could not reach Ollama at ${base}. Make sure Ollama is running (open the Ollama app, or run \`ollama serve\`).`);
    if (provider === "lmstudio")
      return new Error(`Could not reach LM Studio at ${base}. In LM Studio, open the Developer tab and start the local server.`);
    return new Error(`Could not reach the ${provider} API. Check your internet connection. (${msg})`);
  }
  return err;
}

// ----------------------------------------------------------------------------
// chat(settings, opts) -> string
// opts: { system, user, temperature, maxTokens, expectJson }
// ----------------------------------------------------------------------------
export async function chat(settings, opts) {
  const provider = settings.provider;
  const cfg = settings.providers?.[provider] || {};
  const { system = "", user = "", expectJson = false, signal } = opts;

  if (!cfg.model) {
    throw new Error(`No model selected for ${providerLabel(provider)}. Open Settings, pick a model, and save.`);
  }

  // Per-model defaults: auto mode sizes these to the model's real limits.
  const resolved = await resolveGeneration(settings);
  const temperature = opts.temperature ?? resolved.temperature;
  let maxTokens = opts.maxTokens ?? resolved.maxTokens;
  if (resolved.caps.maxOutputTokens) maxTokens = Math.min(maxTokens, resolved.caps.maxOutputTokens);

  switch (provider) {
    case "ollama":
      return ollamaChat(cfg, {
        system, user, temperature, maxTokens, expectJson, signal,
        // Only size num_ctx when we actually know the model's window.
        contextTokens: resolved.caps.source === "live" ? resolved.caps.contextTokens : 0,
      });
    case "lmstudio":
      return openaiCompatChat("lmstudio", cleanBase(cfg.baseUrl, "http://localhost:1234") + "/v1", null, cfg.model, { system, user, temperature, maxTokens, expectJson, signal });
    case "openai":
      requireKey(cfg, "OpenAI");
      return openaiCompatChat("openai", "https://api.openai.com/v1", cfg.apiKey, cfg.model, { system, user, temperature, maxTokens, expectJson, signal });
    case "anthropic":
      requireKey(cfg, "Anthropic");
      return anthropicChat(cfg, { system, user, temperature, maxTokens, signal });
    case "google":
      requireKey(cfg, "Google");
      return googleChat(cfg, { system, user, temperature, maxTokens, expectJson, signal });
    default:
      throw new Error(`Unknown provider: ${provider}`);
  }
}

function requireKey(cfg, name) {
  if (!cfg.apiKey) throw new Error(`No API key set for ${name}. Add it on the Settings page.`);
}

// ---------- streaming helpers ----------
// All backends stream and accumulate server-side. Non-streaming requests sit
// silent until the full response is ready, and Node's fetch kills any request
// whose headers take >5 minutes — which long local generations routinely do.
// Streaming returns headers instantly and each chunk resets the idle timer.
async function* streamLines(res) {
  const decoder = new TextDecoder();
  let buf = "";
  for await (const chunk of res.body) {
    buf += decoder.decode(chunk, { stream: true });
    let i;
    while ((i = buf.indexOf("\n")) !== -1) {
      const line = buf.slice(0, i).trim();
      buf = buf.slice(i + 1);
      if (line) yield line;
    }
  }
  const last = (buf + decoder.decode()).trim();
  if (last) yield last;
}

// Parse server-sent events, invoking onEvent for each JSON data payload.
async function readSse(res, onEvent) {
  for await (const line of streamLines(res)) {
    if (!line.startsWith("data:")) continue;
    const payload = line.slice(5).trim();
    if (!payload || payload === "[DONE]") continue;
    let obj;
    try { obj = JSON.parse(payload); } catch { continue; }
    onEvent(obj);
  }
}

export function providerLabel(p) {
  return { ollama: "Ollama", lmstudio: "LM Studio", openai: "OpenAI", anthropic: "Anthropic", google: "Google AI" }[p] || p;
}

// ---------- Ollama ----------
async function ollamaChat(cfg, { system, user, temperature, maxTokens, expectJson, signal, contextTokens }) {
  const base = cleanBase(cfg.baseUrl, "http://localhost:11434");
  const options = { temperature, num_predict: maxTokens };
  // Ollama defaults num_ctx to ~4k and silently truncates longer prompts, so
  // size the window to this request, bounded by the model's real maximum.
  // chars/3 over-estimates tokens on purpose; the margin covers chat template.
  if (contextTokens) {
    const needed = Math.ceil((system.length + user.length) / 3) + maxTokens + 512;
    options.num_ctx = Math.min(contextTokens, Math.max(4096, Math.ceil(needed / 1024) * 1024));
  }
  let res;
  try {
    res = await fetch(base + "/api/chat", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      signal,
      body: JSON.stringify({
        model: cfg.model,
        stream: true,
        messages: [
          ...(system ? [{ role: "system", content: system }] : []),
          { role: "user", content: user },
        ],
        options,
        ...(expectJson ? { format: "json" } : {}),
      }),
    });
  } catch (e) {
    throw friendlyConnError("ollama", base, e);
  }
  if (!res.ok) throw new Error(`Ollama error (${res.status}): ${await readError(res)}`);

  // NDJSON stream: one JSON object per line.
  let out = "";
  try {
    for await (const line of streamLines(res)) {
      let obj;
      try { obj = JSON.parse(line); } catch { continue; }
      if (obj?.error) throw new Error(`Ollama error: ${obj.error}`);
      if (obj?.message?.content) out += obj.message.content;
    }
  } catch (e) {
    if (/^Ollama error:/.test(String(e?.message))) throw e;
    throw friendlyConnError("ollama", base, e);
  }
  return out;
}

// ---------- OpenAI-compatible (OpenAI + LM Studio) ----------
async function openaiCompatChat(provider, base, apiKey, model, { system, user, temperature, maxTokens, expectJson, signal }) {
  const headers = { "Content-Type": "application/json" };
  if (apiKey) headers["Authorization"] = "Bearer " + apiKey;

  const body = {
    model,
    stream: true,
    temperature,
    max_tokens: maxTokens,
    messages: [
      ...(system ? [{ role: "system", content: system }] : []),
      { role: "user", content: user },
    ],
  };
  if (expectJson && provider === "openai") body.response_format = { type: "json_object" };

  const send = () =>
    fetch(base + "/chat/completions", { method: "POST", headers, signal, body: JSON.stringify(body) });

  let res;
  try {
    res = await send();
  } catch (e) {
    throw friendlyConnError(provider, base, e);
  }

  // Some models reject response_format, temperature, or streaming; retry once without.
  if (!res.ok) {
    const detail = await readError(res);
    let changed = false;
    if (/response_format|temperature|unsupported|param/i.test(detail) && (body.response_format || body.temperature !== undefined)) {
      delete body.response_format;
      delete body.temperature;
      body.max_completion_tokens = body.max_tokens;
      delete body.max_tokens;
      changed = true;
    }
    if (/stream/i.test(detail)) {
      body.stream = false;
      changed = true;
    }
    if (!changed) throw new Error(`${providerLabel(provider)} error (${res.status}): ${detail}`);
    try {
      res = await send();
    } catch (e) {
      throw friendlyConnError(provider, base, e);
    }
    if (!res.ok) throw new Error(`${providerLabel(provider)} error (${res.status}): ${await readError(res)}`);
  }

  if (!body.stream) {
    const data = await res.json();
    return data?.choices?.[0]?.message?.content ?? "";
  }

  let out = "";
  try {
    await readSse(res, (obj) => {
      const delta = obj?.choices?.[0]?.delta;
      if (delta?.content) out += delta.content;
    });
  } catch (e) {
    throw friendlyConnError(provider, base, e);
  }
  return out;
}

// ---------- Anthropic ----------
async function anthropicChat(cfg, { system, user, temperature, maxTokens, signal }) {
  const body = {
    model: cfg.model,
    max_tokens: maxTokens,
    temperature,
    stream: true,
    ...(system ? { system } : {}),
    messages: [{ role: "user", content: user }],
  };
  const send = () =>
    fetch("https://api.anthropic.com/v1/messages", {
      method: "POST",
      headers: { "Content-Type": "application/json", "x-api-key": cfg.apiKey, ...VERSION_HEADER },
      signal,
      body: JSON.stringify(body),
    });

  let res;
  try {
    res = await send();
  } catch (e) {
    throw friendlyConnError("anthropic", "api.anthropic.com", e);
  }

  // Newer Anthropic models (Opus 4.7+, Fable) reject sampling parameters;
  // retry once without temperature.
  if (!res.ok && res.status === 400 && body.temperature !== undefined) {
    const detail = await readError(res);
    if (/temperature|top_p|top_k|sampling/i.test(detail)) {
      delete body.temperature;
      try {
        res = await send();
      } catch (e) {
        throw friendlyConnError("anthropic", "api.anthropic.com", e);
      }
    } else {
      throw new Error(`Anthropic error (400): ${detail}`);
    }
  }
  if (!res.ok) throw new Error(`Anthropic error (${res.status}): ${await readError(res)}`);

  let out = "";
  try {
    await readSse(res, (obj) => {
      if (obj?.type === "content_block_delta" && obj.delta?.type === "text_delta") out += obj.delta.text;
      if (obj?.type === "error") throw new Error(`Anthropic error: ${obj.error?.message || JSON.stringify(obj.error)}`);
    });
  } catch (e) {
    if (/^Anthropic error:/.test(String(e?.message))) throw e;
    throw friendlyConnError("anthropic", "api.anthropic.com", e);
  }
  return out;
}

// ---------- Google ----------
async function googleChat(cfg, { system, user, temperature, maxTokens, expectJson, signal }) {
  const url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(cfg.model)}:streamGenerateContent?alt=sse&key=${encodeURIComponent(cfg.apiKey)}`;
  let res;
  try {
    res = await fetch(url, {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      signal,
      body: JSON.stringify({
        ...(system ? { systemInstruction: { parts: [{ text: system }] } } : {}),
        contents: [{ role: "user", parts: [{ text: user }] }],
        generationConfig: {
          temperature,
          maxOutputTokens: maxTokens,
          ...(expectJson ? { responseMimeType: "application/json" } : {}),
        },
      }),
    });
  } catch (e) {
    throw friendlyConnError("google", "generativelanguage.googleapis.com", e);
  }
  if (!res.ok) throw new Error(`Google AI error (${res.status}): ${await readError(res)}`);

  let out = "";
  try {
    await readSse(res, (obj) => {
      if (obj?.error) throw new Error(`Google AI error: ${obj.error?.message || JSON.stringify(obj.error)}`);
      const parts = obj?.candidates?.[0]?.content?.parts || [];
      out += parts.map((p) => p.text || "").join("");
    });
  } catch (e) {
    if (/^Google AI error:/.test(String(e?.message))) throw e;
    throw friendlyConnError("google", "generativelanguage.googleapis.com", e);
  }
  return out;
}

// ----------------------------------------------------------------------------
// listModels(settings, provider) -> string[]
// ----------------------------------------------------------------------------
export async function listModels(settings, provider) {
  const cfg = settings.providers?.[provider] || {};
  const t = AbortSignal.timeout(15000);
  try {
    if (provider === "ollama") {
      const base = cleanBase(cfg.baseUrl, "http://localhost:11434");
      const res = await fetch(base + "/api/tags", { signal: t }).catch((e) => { throw friendlyConnError("ollama", base, e); });
      if (!res.ok) throw new Error(`Ollama error (${res.status}): ${await readError(res)}`);
      const data = await res.json();
      const models = (data?.models || []).map((m) => m.name).sort();
      if (!models.length) throw new Error("Ollama is running but has no models installed. Run e.g. `ollama pull llama3.1:8b` first.");
      return models;
    }
    if (provider === "lmstudio") {
      const base = cleanBase(cfg.baseUrl, "http://localhost:1234");
      const res = await fetch(base + "/v1/models", { signal: t }).catch((e) => { throw friendlyConnError("lmstudio", base, e); });
      if (!res.ok) throw new Error(`LM Studio error (${res.status}): ${await readError(res)}`);
      const data = await res.json();
      const models = (data?.data || []).map((m) => m.id).sort();
      if (!models.length) throw new Error("LM Studio server is running but no model is loaded. Load a model in LM Studio first.");
      return models;
    }
    if (provider === "openai") {
      requireKey(cfg, "OpenAI");
      const res = await fetch("https://api.openai.com/v1/models", {
        headers: { Authorization: "Bearer " + cfg.apiKey }, signal: t,
      }).catch((e) => { throw friendlyConnError("openai", "api.openai.com", e); });
      if (!res.ok) throw new Error(`OpenAI error (${res.status}): ${await readError(res)}`);
      const data = await res.json();
      return (data?.data || [])
        .map((m) => m.id)
        .filter((id) => /^(gpt-|o\d|chatgpt-)/.test(id) && !/audio|realtime|tts|whisper|image|embed|moderation|transcribe|search/.test(id))
        .sort();
    }
    if (provider === "anthropic") {
      requireKey(cfg, "Anthropic");
      const res = await fetch("https://api.anthropic.com/v1/models?limit=100", {
        headers: { "x-api-key": cfg.apiKey, ...VERSION_HEADER }, signal: t,
      }).catch((e) => { throw friendlyConnError("anthropic", "api.anthropic.com", e); });
      if (!res.ok) throw new Error(`Anthropic error (${res.status}): ${await readError(res)}`);
      const data = await res.json();
      return (data?.data || []).map((m) => m.id).sort();
    }
    if (provider === "google") {
      requireKey(cfg, "Google");
      const res = await fetch(
        `https://generativelanguage.googleapis.com/v1beta/models?pageSize=100&key=${encodeURIComponent(cfg.apiKey)}`,
        { signal: t }
      ).catch((e) => { throw friendlyConnError("google", "generativelanguage.googleapis.com", e); });
      if (!res.ok) throw new Error(`Google AI error (${res.status}): ${await readError(res)}`);
      const data = await res.json();
      return (data?.models || [])
        .filter((m) => (m.supportedGenerationMethods || []).includes("generateContent"))
        .map((m) => String(m.name || "").replace(/^models\//, ""))
        .sort();
    }
    throw new Error("Unknown provider: " + provider);
  } catch (e) {
    if (e?.name === "TimeoutError") throw new Error(`Timed out reaching ${providerLabel(provider)}.`);
    throw e;
  }
}

// ----------------------------------------------------------------------------
// testConnection(settings, provider) -> { ok, message }
// ----------------------------------------------------------------------------
export async function testConnection(settings, provider) {
  const probe = { ...settings, provider };
  const reply = await chat(probe, {
    system: "You are a connection test. Reply with exactly: OK",
    user: "Reply with exactly: OK",
    temperature: 0,
    // Reasoning models (o-series, gpt-5, gemini-2.5, Claude thinking) spend tokens
    // on hidden reasoning before any visible text, so keep a generous budget here
    // or the reply comes back empty even though the connection is fine.
    maxTokens: 2048,
  });
  if (!reply || !reply.trim()) {
    throw new Error(
      "The connection worked but the model returned no text. If this is a reasoning model, it may have used its whole token budget on internal reasoning — try a non-reasoning model, or this is usually safe to ignore."
    );
  }
  return { ok: true, message: `Connected. Model replied: "${reply.trim().slice(0, 40)}"` };
}