From 68d7dd747f68cd12237f21de9a8086a09569f962 Mon Sep 17 00:00:00 2001 From: Luna Date: Tue, 3 Mar 2026 21:20:38 +0100 Subject: [PATCH] updated to cue memory now. --- .env.example | 4 ++++ README.md | 8 ++++---- src/bot.js | 10 ++++++++++ src/config.js | 15 +++++++++++++++ src/memory.js | 35 ++++++++++++++++++++++++----------- src/prompt.js | 14 +++++++++++++- 6 files changed, 70 insertions(+), 16 deletions(-) diff --git a/.env.example b/.env.example index d57b9df..417f2e4 100644 --- a/.env.example +++ b/.env.example @@ -15,8 +15,12 @@ DASHBOARD_PORT=3000 SHORT_TERM_LIMIT=12 # Set to false to stop continually summarizing the short-term buffer ENABLE_SHORT_TERM_SUMMARY=true +# Number of short-term turns before an automatic summary request (default 12) +SUMMARY_TRIGGER_TURNS=12 ENABLE_WEB_SEARCH=true OPENAI_API_KEY=your_openai_api_key # Memory retrieval cooldown (ms) before the same long-term entry can be reused MEMORY_COOLDOWN_MS=180000 +# Soft memory recall requires the retrieved score meet this threshold +MEMORY_RECALL_SIMILARITY_THRESHOLD=0.62 diff --git a/README.md b/README.md index 6868bae..8cd0caf 100644 --- a/README.md +++ b/README.md @@ -94,13 +94,13 @@ Nova is a friendly, slightly witty Discord companion that chats naturally in DMs README.md CHANGELOG.md ``` - - - **Short-term (recency buffer):** Last turns kept verbatim for style and continuity. `SHORT_TERM_LIMIT` (default 12) controls how many of those turns persist, and you can lower it further if you prefer tighter buffers. +okay + - **Short-term (recency buffer):** Last turns kept verbatim for style and continuity. `SHORT_TERM_LIMIT` (default 12) controls how many of those turns persist, and you can lower it further if you prefer tighter buffers. Nova only auto-summarizes the buffer once the transcript crosses `SUMMARY_TRIGGER_TURNS` or `summaryTriggerChars`, so the raw text stays for regular chat while a concise recap is generated every dozen turns to keep token usage manageable. - **Long-term (vector store):** Every user message + bot reply pair becomes an embedding via `text-embedding-3-small`. Embeddings, raw text, timestamps, and heuristic importance scores live in the same SQLite file. Retrieval uses cosine similarity plus a small importance boost; top 5 results feed the prompt. - - **Summary layer:** When the recency buffer grows past ~3000 characters, Nova asks OpenAI to condense the transcript to <120 words, keeps the summary, and trims the raw buffer down to the last few turns. This keeps token usage low while retaining story arcs, but you can disable it with `ENABLE_SHORT_TERM_SUMMARY=false` if you want the raw buffer to stay intact. + - **Summary layer:** When the recency buffer grows past ~3000 characters or `SUMMARY_TRIGGER_TURNS`, Nova asks OpenAI to condense the transcript to <120 words, keeps the summary, and trims the raw buffer down to the last few turns. This keeps token usage low while retaining story arcs, but you can disable it with `ENABLE_SHORT_TERM_SUMMARY=false` if you want the raw buffer to stay intact. - **Importance scoring:** Messages mentioning intent words ("plan", "remember", etc.), showing length, or emotional weight receive higher scores. When the store exceeds its cap, the lowest-importance/oldest memories are pruned. You can also call `pruneLowImportanceMemories()` manually if needed. - - **Embedding math:** `text-embedding-3-small` returns 1,536 floating-point numbers for each text chunk. That giant array is a vector map of the message’s meaning; similar moments land near each other in 1,536-dimensional space. + - **Pattern-aware long-term recall:** Long-term memory is only queried when Nova detects a recall cue (`remember`, `do you know`, `we talked`, `refresh my memory`, etc.). When a cue fires, she fetches the top cosine-similar memories but only keeps the ones whose score meets `MEMORY_RECALL_SIMILARITY_THRESHOLD` (default 0.62); otherwise the conversation stays anchored on the short-term buffer and summary. This keeps memory-driven context from popping up during casual chat unless you explicitly ask for it. - **What gets embedded:** After every user→bot turn, `recordInteraction()` (see [src/memory.js](src/memory.js)) bundles the pair, scores its importance, asks OpenAI for an embedding, and stores `{ content, embedding, importance, timestamp }` inside the SQLite tables. - **Why so many numbers:** Cosine similarity needs raw vectors to compare new thoughts to past ones. When a fresh message arrives, `retrieveRelevantMemories()` embeds it too, calculates cosine similarity against every stored vector, adds a small importance boost, and returns the top five memories to inject into the system prompt. - **Memory cooldown:** `MEMORY_COOLDOWN_MS` (defaults to 180000 ms) keeps a long-term memory out of the retrieval window for a few minutes after it was just used so Nova has to pull fresh context before repeating herself, while still falling back automatically if there isn’t anything new to surface. diff --git a/src/bot.js b/src/bot.js index feba649..92994fd 100644 --- a/src/bot.js +++ b/src/bot.js @@ -22,9 +22,15 @@ let coderPingTimer; const continuationState = new Map(); let isSleeping = false; +const recallPatterns = config.memoryRecallTriggerPatterns || []; const contextCache = new Map(); const CONTEXT_CACHE_TTL_MS = 2 * 60 * 1000; +function matchesMemoryRecallCue(text) { + if (!text) return false; + return recallPatterns.some((pattern) => pattern.test(text)); +} + const cloneShortTerm = (entries = []) => entries.map((entry) => ({ ...entry })); const cloneMemories = (entries = []) => entries.map((entry) => ({ ...entry })); @@ -203,6 +209,7 @@ function startContinuationForUser(userId, channel) { const { messages, debug } = await buildPrompt(userId, incomingText, { context: cachedContext, userName: cachedContext?.userName || null, + includeMemories: false, }); cacheContext(userId, debug.context); const reply = await chatCompletion(messages, { temperature: 0.7, maxTokens: 200 }); @@ -687,6 +694,7 @@ client.on('messageCreate', async (message) => { return; } + const recallTrigger = matchesMemoryRecallCue(cleaned); const intelMeta = (await maybeFetchLiveIntel(userId, cleaned)) || { liveIntel: null, blockedSearchTerm: null, @@ -696,6 +704,8 @@ client.on('messageCreate', async (message) => { liveIntel: intelMeta.liveIntel, blockedSearchTerm: intelMeta.blockedSearchTerm, searchOutage: intelMeta.searchOutage, + includeMemories: recallTrigger, + similarityThreshold: config.memoryRecallSimilarityThreshold, userName: message.member?.displayName || message.author.username, }); cacheContext(userId, debug.context); diff --git a/src/config.js b/src/config.js index 77f4b3b..28811c5 100644 --- a/src/config.js +++ b/src/config.js @@ -7,6 +7,16 @@ dotenv.config(); const defaultMemoryDbFile = fileURLToPath(new URL('../data/memory.sqlite', import.meta.url)); const legacyMemoryFile = fileURLToPath(new URL('../data/memory.json', import.meta.url)); +const memoryRecallTriggerPatterns = [ + /remember( when| that)?/i, + /do you know( about)?/i, + /do you remember/i, + /\bwe talked\b/i, + /\brecall\b/i, + /\bremind\b/i, + /\brefresh my memory\b/i, +]; + const requiredEnv = ['DISCORD_TOKEN']; requiredEnv.forEach((key) => { if (!process.env[key]) { @@ -33,10 +43,15 @@ export const config = { memoryDbFile: process.env.MEMORY_DB_FILE ? path.resolve(process.env.MEMORY_DB_FILE) : defaultMemoryDbFile, legacyMemoryFile, summaryTriggerChars: 2200, + summaryTriggerTurns: process.env.SUMMARY_TRIGGER_TURNS ? parseInt(process.env.SUMMARY_TRIGGER_TURNS, 10) : 12, memoryPruneThreshold: 0.2, memoryCooldownMs: process.env.MEMORY_COOLDOWN_MS ? parseInt(process.env.MEMORY_COOLDOWN_MS, 10) : 3 * 60 * 1000, maxMemories: 8000, enableShortTermSummary: process.env.ENABLE_SHORT_TERM_SUMMARY !== 'false', + memoryRecallSimilarityThreshold: process.env.MEMORY_RECALL_SIMILARITY_THRESHOLD + ? parseFloat(process.env.MEMORY_RECALL_SIMILARITY_THRESHOLD) + : 0.62, + memoryRecallTriggerPatterns, relevantMemoryCount: 3, longTermFetchLimit: 120, // Optional local dashboard that runs alongside the bot. Enable with diff --git a/src/memory.js b/src/memory.js index ce30146..e8d23eb 100644 --- a/src/memory.js +++ b/src/memory.js @@ -242,7 +242,10 @@ const fullShortTerm = (db, userId) => const maybeSummarize = async (db, userId) => { const shortTermEntries = fullShortTerm(db, userId); const charCount = shortTermEntries.reduce((sum, msg) => sum + (msg.content?.length || 0), 0); - if (charCount < config.summaryTriggerChars || shortTermEntries.length < config.shortTermLimit) { + if ( + charCount < config.summaryTriggerChars && + shortTermEntries.length < (config.summaryTriggerTurns || config.shortTermLimit) + ) { return false; } const userRow = get(db, 'SELECT summary FROM users WHERE id = ?', [userId]) || { summary: '' }; @@ -327,7 +330,7 @@ const retrieveRelevantMemories = async (db, userId, query, options = {}) => { return []; } const limit = config.longTermFetchLimit || 200; - const { includeAllUsers = false } = options; + const { includeAllUsers = false, minScore = Number.NEGATIVE_INFINITY } = options; const params = []; const whereClause = includeAllUsers ? '' : ' WHERE user_id = ?'; if (!includeAllUsers) { @@ -344,7 +347,7 @@ const retrieveRelevantMemories = async (db, userId, query, options = {}) => { } const now = Date.now(); const cooldown = config.memoryCooldownMs || 0; - const usage = memoryUsageMap.get(userId); + const usage = getMemoryUsageMapForUser(userId); const eligibleRows = cooldown && usage ? rows.filter((entry) => now - (usage.get(entry.id) || 0) > cooldown) @@ -360,13 +363,13 @@ const retrieveRelevantMemories = async (db, userId, query, options = {}) => { score: cosineSimilarity(queryEmbedding, embedding) + entry.importance * 0.1, }; }) - .sort((a, b) => b.score - a.score) - .slice(0, config.relevantMemoryCount); - if (scored.length) { - const usageMap = getMemoryUsageMapForUser(userId); - scored.forEach((entry) => usageMap.set(entry.id, now)); + .sort((a, b) => b.score - a.score); + const filtered = scored.filter((entry) => entry.score >= minScore); + const capped = filtered.slice(0, config.relevantMemoryCount); + if (capped.length) { + capped.forEach((entry) => usage.set(entry.id, now)); } - return scored; + return capped; }; export async function appendShortTerm(userId, role, content) { @@ -390,8 +393,18 @@ export async function prepareContext(userId, incomingMessage, options = {}) { ensureUser(db, userId); const userRow = get(db, 'SELECT summary FROM users WHERE id = ?', [userId]) || { summary: '' }; const shortTerm = getShortTermHistory(db, userId, config.shortTermLimit); - const { includeAllUsers = false } = options; - const memories = await retrieveRelevantMemories(db, userId, incomingMessage, { includeAllUsers }); + const { + includeAllUsers = false, + includeLongTerm = true, + memorySimilarityThreshold = Number.NEGATIVE_INFINITY, + } = options; + const memories = + includeLongTerm && incomingMessage?.trim() + ? await retrieveRelevantMemories(db, userId, incomingMessage, { + includeAllUsers, + minScore: memorySimilarityThreshold, + }) + : []; return { shortTerm, summary: userRow.summary || '', diff --git a/src/prompt.js b/src/prompt.js index cae6574..10f9c88 100644 --- a/src/prompt.js +++ b/src/prompt.js @@ -95,9 +95,21 @@ export async function buildPrompt(userId, incomingText, options = {}) { context: providedContext = null, useGlobalMemories = false, userName = null, + includeMemories = false, + similarityThreshold = null, } = options; const context = - providedContext || (await prepareContext(userId, incomingText, { includeAllUsers: useGlobalMemories })); + providedContext || + (await prepareContext(userId, incomingText, { + includeAllUsers: useGlobalMemories, + includeLongTerm: includeMemories || useGlobalMemories, + memorySimilarityThreshold: + includeMemories && similarityThreshold !== null + ? similarityThreshold + : includeMemories + ? config.memoryRecallSimilarityThreshold + : Number.NEGATIVE_INFINITY, + })); if (userName) { context.userName = userName; } else if (context.userName === undefined) {