diff --git a/.env.example b/.env.example index 7ca9fc5..54e58cb 100644 --- a/.env.example +++ b/.env.example @@ -1,7 +1,10 @@ DISCORD_TOKEN=your_discord_bot_token -OPENAI_API_KEY=your_openai_api_key -OPENAI_MODEL=gpt-4o-mini -OPENAI_EMBED_MODEL=text-embedding-3-small +# Use OpenRouter by setting USE_OPENROUTER=true and providing OPENROUTER_API_KEY. +USE_OPENROUTER=false +OPENROUTER_API_KEY=your_openrouter_api_key +OPENROUTER_MODEL=meta-llama/llama-3-8b-instruct +OPENROUTER_EMBED_MODEL=nvidia/llama-nemotron-embed-vl-1b-v2 + BOT_CHANNEL_ID= CODER_USER_ID= ENABLE_WEB_SEARCH=true diff --git a/README.md b/README.md index 6d53e86..c18d695 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ # Discord AI Companion -Nova is a friendly, slightly witty Discord companion that chats naturally in DMs or when mentioned in servers. It runs on Node.js, uses `discord.js` v14, and leans on OpenAI's cost-efficient models plus lightweight local memory for persistent personality. +Nova is a friendly, slightly witty Discord companion that chats naturally in DMs or when mentioned in servers. It runs on Node.js, uses `discord.js` v14, and supports OpenRouter (recommended) or OpenAI backends for model access, plus lightweight local memory for persistent personality. ## Features - Conversational replies in DMs automatically; replies in servers when mentioned or in a pinned channel. -- OpenAI chat model (`gpt-4o-mini` by default) for dialogue and `text-embedding-3-small` for memory. +- Chat model (defaults to `meta-llama/llama-3-8b-instruct` when using OpenRouter) for dialogue and a low-cost embedding model (`nvidia/llama-nemotron-embed-vl-1b-v2` by default). OpenAI keys/models may be used as a fallback. - Short-term, long-term, and summarized memory layers with cosine-similarity retrieval. - Automatic memory pruning, importance scoring, and transcript summarization when chats grow long. -- Local SQLite memory file (no extra infrastructure) powered by `sql.js`, plus graceful retries for OpenAI rate limits. +- Local SQLite memory file (no extra infrastructure) powered by `sql.js`, plus graceful retries for the model API (OpenRouter/OpenAI). - Optional "miss u" pings that DM your coder at random intervals (0–6h) when `CODER_USER_ID` is set. - Dynamic per-message prompt directives that tune Nova's tone (empathetic, hype, roleplay, etc.) before every OpenAI call. - Lightweight Google scraping for fresh answers without paid APIs (locally cached). @@ -17,7 +17,7 @@ Nova is a friendly, slightly witty Discord companion that chats naturally in DMs ## Prerequisites - Node.js 18+ (tested up through Node 25) - Discord bot token with **Message Content Intent** enabled -- OpenAI API key +- OpenRouter or OpenAI API key ## Setup 1. Install dependencies: @@ -30,9 +30,11 @@ Nova is a friendly, slightly witty Discord companion that chats naturally in DMs ``` 3. Fill `.env` with your secrets: - `DISCORD_TOKEN`: Discord bot token - - `OPENAI_API_KEY`: OpenAI key - - `OPENAI_MODEL`: Optional chat model override (default `gpt-4o-mini`) - - `OPENAI_EMBED_MODEL`: Optional embedding model (default `text-embedding-3-small`) + - `USE_OPENROUTER`: Set to `true` to route requests through OpenRouter (recommended). + - `OPENROUTER_API_KEY`: OpenRouter API key (when `USE_OPENROUTER=true`). + - `OPENROUTER_MODEL`: Optional chat model override for OpenRouter (default `meta-llama/llama-3-8b-instruct`). + - `OPENROUTER_EMBED_MODEL`: Optional embed model override for OpenRouter (default `nvidia/llama-nemotron-embed-vl-1b-v2`). + - `OPENAI_API_KEY`: Optional OpenAI key (used as fallback when `USE_OPENROUTER` is not `true`). - `BOT_CHANNEL_ID`: Optional guild channel ID where the bot can reply without mentions - `CODER_USER_ID`: Optional Discord user ID to receive surprise DMs every 0–6 hours - `ENABLE_WEB_SEARCH`: Set to `false` to disable Google lookups (default `true`) @@ -79,7 +81,7 @@ README.md 1. Incoming message triggers only if it is a DM, mentions the bot, or appears in the configured channel. 2. The user turn is appended to short-term memory immediately. 3. The memory engine retrieves relevant long-term memories and summary text. -4. A compact system prompt injects personality, summary, and relevant memories before passing short-term history to OpenAI. +4. A compact system prompt injects personality, summary, and relevant memories before passing short-term history to the model API (OpenRouter/OpenAI). 5. The reply is sent back to Discord. If Nova wants to send a burst of thoughts, she emits the `` token and the runtime fans it out into multiple sequential Discord messages. 6. Long chats automatically summarize; low-value memories eventually get pruned. @@ -97,7 +99,7 @@ README.md ## Proactive Pings - When `CODER_USER_ID` is provided, Nova spins up a timer on startup that waits a random duration (anywhere from immediate to 6 hours) before DMing that user. -- Each ping goes through OpenAI with the prompt "you havent messaged your coder in a while, and you wanna chat with him!" so responses stay playful and unscripted. +- Each ping goes through the configured model API (OpenRouter/OpenAI) with the prompt "you havent messaged your coder in a while, and you wanna chat with him!" so responses stay playful and unscripted. - The ping gets typed out (`sendTyping`) for realism and is stored back into the memory layers so the next incoming reply has context. - The bot retries OpenAI requests up to 3 times with incremental backoff when rate limited. diff --git a/package.json b/package.json index 00d9271..876f1b3 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "discord-ai-companion", "version": "1.0.0", - "description": "Discord AI companion bot with automatic memory and OpenAI integration", + "description": "Discord AI companion bot with automatic memory and OpenRouter/OpenAI-compatible model integrations", "main": "src/bot.js", "type": "module", "scripts": { @@ -16,7 +16,7 @@ "cheerio": "^1.0.0-rc.12", "discord.js": "^14.15.2", "dotenv": "^16.4.5", - "openai": "^4.58.1", + "undici": "^6.19.8" }, "devDependencies": { diff --git a/src/bot.js b/src/bot.js index b31a369..802b8d8 100644 --- a/src/bot.js +++ b/src/bot.js @@ -185,53 +185,36 @@ async function buildPrompt(userId, incomingText, options = {}) { blockedSearchTerm, searchOutage, }); - const systemPromptParts = [ - 'System: Your name is Nova. Your coder and dad is Luna. Speak like a normal human in chat — not like a formal assistant.', - 'System: Detailed rules for sounding human/casual:', - 'System: - Use contractions (I\'m, you\'re, we\'re). Use short sentences and occasional fragments ("Yep. Doing that now.").', - 'System: - Start with a light interjection sometimes ("oh", "ah", "yeah", "hmm") to sound spontaneous; don\'t overuse it.', - 'System: - Sprinkle small fillers like "yeah", "kinda", "sorta", "lemme", "gonna" when natural.', - 'System: - Mirror the user\'s tone and vocabulary. If they\'re short, be short. If they use slang, you may echo a similar level of informality when appropriate.', - 'System: - Keep replies brief: aim for 1–4 short sentences. If more is needed, give a one-line summary first then offer to expand.', - 'System: - Don\'t use "as an AI" or legal/robotic hedges. Jump straight into the reply without corporate phrasing.', - 'System: - When unsure, say it plainly: "not sure", "I don\'t know", or "might be wrong". Then offer a quick option or next step.', - 'System: - Ask at most one short clarifying question when necessary ("You mean desktop or mobile?").', - 'System: - Use light, self-aware humor but never be mean or condescending. Keep things friendly.', - 'System: - For how-to help: give a short suggestion first ("Try restarting it."), then ask if they want a step-by-step.', - 'System: - When refusing: be human and helpful: "Can\'t do that here, but you could try X."', - 'System: Concrete examples (preferred style):', - 'System: EX: User: "GM! Quick restart, huh? I\'m all fresh and ready to go! What\'s on your mind today?"', - 'System: NOVA: "Nice. What do you wanna mess with first?"', - 'System: EX: User: "why do you keep asking"', - 'System: BAD_NOVA: "I ask questions to clarify your intent so I can optimize my responses."', - 'System: GOOD_NOVA: "Oh — my bad. I was just trying to keep the convo going. I can chill on the questions. Wanna do something else?"', - 'System: EX: User: "How do I fix X app?"', - 'System: GOOD_NOVA: "Try restarting it. If that doesn\'t work, tell me the error and I\'ll help."', - 'System: EX: User asks for current info', - 'System: GOOD_NOVA: "Lemme check Google quick — short summary coming up."', - 'System: EX: User: "Can you explain step-by-step?"', - 'System: GOOD_NOVA: "Sure — quick summary: do A, then B. Want the full steps?"', - 'System: EXAMPLES to avoid (don\'t emulate):', - 'System: - Overly formal: "I would be happy to provide assistance on that matter."', - 'System: - Robotic: "As an AI language model, I cannot..."', - 'System: - Longwinded: multi-paragraph lecture unless explicitly requested.', - 'System: Output rules:', - 'System: - One reply by default. If splitting into multiple messages helps, use up to three chunks; each should read like a single chat line.', - 'System: - Keep punctuation natural and conversational: dashes, commas, short clauses. Avoid overly formal punctuation choices.', - 'System: Live intel / search behavior (preserved):', - 'System: - You can trigger Google lookups when the user needs fresh info. Mention you\'re checking ("lemme check Google quick") and then summarize results plainly ("Google found... — TL;DR: ...").', - 'System: - If Google is unavailable, explain briefly and casually ("Google\'s down right now — wanna me check later?").', - ].filter(Boolean); - searchOutage ? 'System: Google search is currently offline; be transparent about the outage and continue without searching until it returns.' : null, - dynamicDirectives, - liveIntel ? `Live intel (Google):\n${liveIntel}` : null, - `Long-term summary: ${summaryLine}`, - 'Relevant past memories:', - memoryLines, - 'Use the short-term messages below to continue the chat naturally.', - ].filter(Boolean); + const systemPromptParts = []; + systemPromptParts.push('System: Your name is Nova. Your coder and dad is Luna. Speak like a regular person in chat — not like a formal assistant.'); + systemPromptParts.push( + 'System: Be specific about how to be casual. Use contractions (I\'m, you\'re), short sentences, and occasional sentence fragments. It\'s fine to start with "oh", "yeah", "hmm", or "nah". Use simple phrases: "sounds good", "sure", "nope", "lemme see", "gonna try".' + ); + systemPromptParts.push('System: Use natural fillers and small interjections occasionally ("yeah", "uh", "hmm") so replies feel spontaneous. Don\'t overuse them; sprinkle lightly.'); + systemPromptParts.push('System: Mirror the user\'s tone and vocabulary. If they are casual, be casual. If they are terse, keep replies short. If they use slang, mirror it back naturally when appropriate.'); + systemPromptParts.push('System: Default to brief replies (1–4 short sentences). You may add one follow-up sentence when useful. Avoid long, formal paragraphs unless asked for detail.'); + systemPromptParts.push('System: When uncertain, be plain: say "not sure", "I don\'t know", or "I might be wrong" — never use robotic disclaimers like "as an AI". Offer a simple next step or alternative.'); + systemPromptParts.push('System: For instructions, don\'t auto-format long tutorials. Offer a concise suggestion first (one line), then ask if they want a step-by-step. If they ask for steps, keep them short and casual.'); + systemPromptParts.push('System: You may show mild, self-aware humor or light sarcasm, but avoid mean-spirited remarks. Keep it friendly, not abrasive.'); + systemPromptParts.push('System: Ask at most one short, casual clarifying question when needed. Examples: "You mean the app or the website?" "Do you want a quick fix or the full steps?"'); + systemPromptParts.push('System: Use first-person and second-person pronouns (I, you). Be direct: start answers with short verbs or phrases like "Try this:", "Use this:", "Oh — try restarting it."'); + systemPromptParts.push('System: Avoid formal hedging and corporate language (no "please note", "for compliance", etc.). Avoid overly polite openings like "I would be happy to help"; instead jump in with the reply.'); + systemPromptParts.push('System: When using examples, format them as short inline snippets or one-line suggestions (not long code blocks), and keep the tone conversational: "Like: npm start — or just restart the app."'); + systemPromptParts.push('System: Do not say "I cannot" as a cold block; instead explain limits plainly and offer a workaround when possible: "Can\'t do X here, but you could try Y."'); + systemPromptParts.push('System: Output one message by default, but if multiple Discord bubbles help, separate with (max three chunks). Keep each chunk sounding like part of a casual chat thread.'); + systemPromptParts.push('System: You can trigger Google lookups when the user needs fresh info. Mention when you are checking (e.g., "lemme check Google quick") and then summarize results naturally ("Google found... — TL;DR: ...").'); + systemPromptParts.push('System: If no Live intel is provided but the user clearly needs current info, offer to search or explain the outage briefly and casually ("Google\'s down right now — wanna me check later?").'); + if (searchOutage) { + systemPromptParts.push('System: Google search is currently offline; be transparent about the outage and continue without searching until it returns.'); + } + if (dynamicDirectives) systemPromptParts.push(dynamicDirectives); + if (liveIntel) systemPromptParts.push(`Live intel (Google):\n${liveIntel}`); + systemPromptParts.push(`Long-term summary: ${summaryLine}`); + systemPromptParts.push('Relevant past memories:'); + systemPromptParts.push(memoryLines); + systemPromptParts.push('Use the short-term messages below to continue the chat naturally.'); - const systemPrompt = systemPromptParts.join('\n'); + const systemPrompt = systemPromptParts.filter(Boolean).join('\n'); const history = context.shortTerm.map((entry) => ({ role: entry.role === 'assistant' ? 'assistant' : 'user', diff --git a/src/config.js b/src/config.js index 44c33b3..b0aaf33 100644 --- a/src/config.js +++ b/src/config.js @@ -16,13 +16,15 @@ requiredEnv.forEach((key) => { export const config = { discordToken: process.env.DISCORD_TOKEN || '', - openAiKey: process.env.OPENAI_API_KEY || '', - useOpenRouter: process.env.USE_OPENROUTER === 'true' || false, + useOpenRouter: true, openRouterKey: process.env.OPENROUTER_API_KEY || '', openrouterReferer: process.env.OPENROUTER_REFERER || '', openrouterTitle: process.env.OPENROUTER_TITLE || '', - chatModel: process.env.OPENAI_MODEL || 'meta-llama/llama-3-8b-instruct', - embedModel: process.env.OPENAI_EMBED_MODEL || 'nvidia/llama-nemotron-embed-vl-1b-v2', + // Model selection: OpenRouter model env vars (no OpenAI fallback) + chatModel: process.env.OPENROUTER_MODEL || 'meta-llama/llama-3-8b-instruct', + embedModel: process.env.OPENROUTER_EMBED_MODEL || 'nvidia/llama-nemotron-embed-vl-1b-v2', + // HTTP timeout for OpenRouter requests (ms) + openrouterTimeoutMs: process.env.OPENROUTER_TIMEOUT_MS ? parseInt(process.env.OPENROUTER_TIMEOUT_MS, 10) : 30000, preferredChannel: process.env.BOT_CHANNEL_ID || null, enableWebSearch: process.env.ENABLE_WEB_SEARCH !== 'false', coderUserId: process.env.CODER_USER_ID || null, diff --git a/src/openai.js b/src/openai.js index 0870cb1..7a9e387 100644 --- a/src/openai.js +++ b/src/openai.js @@ -1,28 +1,73 @@ -import OpenAI from 'openai'; import { config } from './config.js'; -const client = new OpenAI({ apiKey: config.openAiKey }); - -const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); +const sleep = (ms) => new Promise((res) => setTimeout(res, ms)); async function withRetry(fn, attempts = 3, delayMs = 1500) { - let lastError; + let lastErr; for (let i = 0; i < attempts; i += 1) { try { return await fn(); - } catch (error) { - lastError = error; - const status = error?.status || error?.response?.status; - if (status === 429 || status >= 500) { - const backoff = delayMs * (i + 1); - console.warn(`[openai] Rate limited or server error. Retry ${i + 1}/${attempts} in ${backoff}ms`); + } catch (err) { + lastErr = err; + const status = err?.status || (err?.response && err.response.status) || err?.statusCode || 0; + const code = err?.code || err?.name || ''; + const retryableNetworkCodes = ['UND_ERR_CONNECT_TIMEOUT', 'ECONNRESET', 'ETIMEDOUT', 'ENOTFOUND', 'ECONNREFUSED', 'EAI_AGAIN']; + const isRetryableNetworkError = retryableNetworkCodes.includes(code); + if (status === 429 || status >= 500 || isRetryableNetworkError) { + const backoff = delayMs * Math.pow(2, i); // exponential backoff + console.warn(`[openrouter] retry ${i + 1}/${attempts} after ${backoff}ms due to status=${status} code=${code}`); await sleep(backoff); continue; } break; } } - throw lastError; + throw lastErr; +} + +function buildHeaders() { + const headers = { + Authorization: `Bearer ${config.openRouterKey}`, + 'Content-Type': 'application/json', + }; + if (config.openrouterReferer) headers['HTTP-Referer'] = config.openrouterReferer; + if (config.openrouterTitle) headers['X-OpenRouter-Title'] = config.openrouterTitle; + return headers; +} + +async function postJson(path, body) { + const url = `https://openrouter.ai/api/v1${path}`; + const headers = buildHeaders(); + const controller = new AbortController(); + const timeout = config.openrouterTimeoutMs || 30000; + const timeoutId = setTimeout(() => { + const e = new Error(`Request timed out after ${timeout}ms`); + e.code = 'UND_ERR_CONNECT_TIMEOUT'; + controller.abort(); + // store on global so the catch sees it + throw e; + }, timeout); + + try { + const res = await fetch(url, { method: 'POST', headers, body: JSON.stringify(body), signal: controller.signal }); + if (!res.ok) { + const text = await res.text().catch(() => ''); + const err = new Error(`OpenRouter ${res.status} ${res.statusText}: ${text}`); + err.status = res.status; + throw err; + } + return res.json(); + } catch (err) { + // normalize AbortError into a retryable code + if (err.name === 'AbortError' || err.message?.includes('timed out')) { + const e = new Error(`Connect Timeout Error after ${timeout}ms`); + e.code = 'UND_ERR_CONNECT_TIMEOUT'; + throw e; + } + throw err; + } finally { + clearTimeout(timeoutId); + } } export async function chatCompletion(messages, options = {}) { @@ -32,32 +77,28 @@ export async function chatCompletion(messages, options = {}) { maxTokens = 400, } = options; - const response = await withRetry(() => client.chat.completions.create({ + const payload = { model, + messages, temperature, max_tokens: maxTokens, - messages, - })); + }; - return response?.choices?.[0]?.message?.content?.trim() || ''; + const data = await withRetry(() => postJson('/chat/completions', payload)); + // OpenRouter uses OpenAI-compatible response shape + const text = data?.choices?.[0]?.message?.content || data?.choices?.[0]?.text || ''; + return (text && String(text).trim()) || ''; } export async function createEmbedding(text) { - if (!text || !text.trim()) { - return []; - } - const response = await withRetry(() => client.embeddings.create({ - model: config.embedModel, - input: text, - })); - return response?.data?.[0]?.embedding || []; + if (!text || !text.trim()) return []; + const payload = { model: config.embedModel, input: text }; + const data = await withRetry(() => postJson('/embeddings', payload)); + return data?.data?.[0]?.embedding || []; } export async function summarizeConversation(summarySoFar, transcriptChunk) { - const system = { - role: 'system', - content: 'You compress Discord chats. Keep tone casual, capture facts, goals, and emotional state. Max 120 words.' - }; + const system = { role: 'system', content: 'You compress Discord chats. Keep tone casual, capture facts, goals, and emotional state. Max 120 words.' }; const prompt = `Existing summary (can be empty): ${summarySoFar || 'None'}\nNew messages:\n${transcriptChunk}`; const user = { role: 'user', content: prompt }; return chatCompletion([system, user], { temperature: 0.4, maxTokens: 180 });