updated to cue memory now.

2026-03-03 21:20:38 +01:00
parent 55b5e88acb
commit 68d7dd747f
6 changed files with 70 additions and 16 deletions
--- a/.env.example
+++ b/.env.example
@@ -15,8 +15,12 @@ DASHBOARD_PORT=3000
 SHORT_TERM_LIMIT=12
 # Set to false to stop continually summarizing the short-term buffer
 ENABLE_SHORT_TERM_SUMMARY=true
 # Number of short-term turns before an automatic summary request (default 12)
 SUMMARY_TRIGGER_TURNS=12
 ENABLE_WEB_SEARCH=true
 OPENAI_API_KEY=your_openai_api_key
 # Memory retrieval cooldown (ms) before the same long-term entry can be reused
 MEMORY_COOLDOWN_MS=180000
 # Soft memory recall requires the retrieved score meet this threshold
 MEMORY_RECALL_SIMILARITY_THRESHOLD=0.62
--- a/README.md
+++ b/README.md
@@ -94,13 +94,13 @@ Nova is a friendly, slightly witty Discord companion that chats naturally in DMs
   README.md
   CHANGELOG.md
   ```
-
+okay
-   - **Short-term (recency buffer):** Last turns kept verbatim for style and continuity. `SHORT_TERM_LIMIT` (default 12) controls how many of those turns persist, and you can lower it further if you prefer tighter buffers.
+   - **Short-term (recency buffer):** Last turns kept verbatim for style and continuity. `SHORT_TERM_LIMIT` (default 12) controls how many of those turns persist, and you can lower it further if you prefer tighter buffers. Nova only auto-summarizes the buffer once the transcript crosses `SUMMARY_TRIGGER_TURNS` or `summaryTriggerChars`, so the raw text stays for regular chat while a concise recap is generated every dozen turns to keep token usage manageable.
   - **Long-term (vector store):** Every user message + bot reply pair becomes an embedding via `text-embedding-3-small`. Embeddings, raw text, timestamps, and heuristic importance scores live in the same SQLite file. Retrieval uses cosine similarity plus a small importance boost; top 5 results feed the prompt.
-   - **Summary layer:** When the recency buffer grows past ~3000 characters, Nova asks OpenAI to condense the transcript to <120 words, keeps the summary, and trims the raw buffer down to the last few turns. This keeps token usage low while retaining story arcs, but you can disable it with `ENABLE_SHORT_TERM_SUMMARY=false` if you want the raw buffer to stay intact.
+   - **Summary layer:** When the recency buffer grows past ~3000 characters or `SUMMARY_TRIGGER_TURNS`, Nova asks OpenAI to condense the transcript to <120 words, keeps the summary, and trims the raw buffer down to the last few turns. This keeps token usage low while retaining story arcs, but you can disable it with `ENABLE_SHORT_TERM_SUMMARY=false` if you want the raw buffer to stay intact.
   - **Importance scoring:** Messages mentioning intent words ("plan", "remember", etc.), showing length, or emotional weight receive higher scores. When the store exceeds its cap, the lowest-importance/oldest memories are pruned. You can also call `pruneLowImportanceMemories()` manually if needed.
-   - **Embedding math:** `text-embedding-3-small` returns 1,536 floating-point numbers for each text chunk. That giant array is a vector map of the message’s meaning; similar moments land near each other in 1,536-dimensional space.
+   - **Pattern-aware long-term recall:** Long-term memory is only queried when Nova detects a recall cue (`remember`, `do you know`, `we talked`, `refresh my memory`, etc.). When a cue fires, she fetches the top cosine-similar memories but only keeps the ones whose score meets `MEMORY_RECALL_SIMILARITY_THRESHOLD` (default 0.62); otherwise the conversation stays anchored on the short-term buffer and summary. This keeps memory-driven context from popping up during casual chat unless you explicitly ask for it.
   - **What gets embedded:** After every user→bot turn, `recordInteraction()` (see [src/memory.js](src/memory.js)) bundles the pair, scores its importance, asks OpenAI for an embedding, and stores `{ content, embedding, importance, timestamp }` inside the SQLite tables.
 - **Why so many numbers:** Cosine similarity needs raw vectors to compare new thoughts to past ones. When a fresh message arrives, `retrieveRelevantMemories()` embeds it too, calculates cosine similarity against every stored vector, adds a small importance boost, and returns the top five memories to inject into the system prompt.
 - **Memory cooldown:** `MEMORY_COOLDOWN_MS` (defaults to 180000 ms) keeps a long-term memory out of the retrieval window for a few minutes after it was just used so Nova has to pull fresh context before repeating herself, while still falling back automatically if there isn’t anything new to surface.
--- a/src/bot.js
+++ b/src/bot.js
@@ -22,9 +22,15 @@ let coderPingTimer;
 const continuationState = new Map();
 let isSleeping = false;
 const recallPatterns = config.memoryRecallTriggerPatterns || [];
 const contextCache = new Map();
 const CONTEXT_CACHE_TTL_MS = 2 * 60 * 1000;
 function matchesMemoryRecallCue(text) {
  if (!text) return false;
  return recallPatterns.some((pattern) => pattern.test(text));
 }
 const cloneShortTerm = (entries = []) => entries.map((entry) => ({ ...entry }));
 const cloneMemories = (entries = []) => entries.map((entry) => ({ ...entry }));
@@ -203,6 +209,7 @@ function startContinuationForUser(userId, channel) {
      const { messages, debug } = await buildPrompt(userId, incomingText, {
        context: cachedContext,
        userName: cachedContext?.userName || null,
        includeMemories: false,
      });
      cacheContext(userId, debug.context);
      const reply = await chatCompletion(messages, { temperature: 0.7, maxTokens: 200 });
@@ -687,6 +694,7 @@ client.on('messageCreate', async (message) => {
      return;
    }
    const recallTrigger = matchesMemoryRecallCue(cleaned);
    const intelMeta = (await maybeFetchLiveIntel(userId, cleaned)) || {
      liveIntel: null,
      blockedSearchTerm: null,
@@ -696,6 +704,8 @@ client.on('messageCreate', async (message) => {
      liveIntel: intelMeta.liveIntel,
      blockedSearchTerm: intelMeta.blockedSearchTerm,
      searchOutage: intelMeta.searchOutage,
      includeMemories: recallTrigger,
      similarityThreshold: config.memoryRecallSimilarityThreshold,
      userName: message.member?.displayName || message.author.username,
    });
    cacheContext(userId, debug.context);
--- a/src/config.js
+++ b/src/config.js
@@ -7,6 +7,16 @@ dotenv.config();
 const defaultMemoryDbFile = fileURLToPath(new URL('../data/memory.sqlite', import.meta.url));
 const legacyMemoryFile = fileURLToPath(new URL('../data/memory.json', import.meta.url));
 const memoryRecallTriggerPatterns = [
  /remember( when| that)?/i,
  /do you know( about)?/i,
  /do you remember/i,
  /\bwe talked\b/i,
  /\brecall\b/i,
  /\bremind\b/i,
  /\brefresh my memory\b/i,
 ];
 const requiredEnv = ['DISCORD_TOKEN'];
 requiredEnv.forEach((key) => {
  if (!process.env[key]) {
@@ -33,10 +43,15 @@ export const config = {
  memoryDbFile: process.env.MEMORY_DB_FILE ? path.resolve(process.env.MEMORY_DB_FILE) : defaultMemoryDbFile,
  legacyMemoryFile,
  summaryTriggerChars: 2200,
  summaryTriggerTurns: process.env.SUMMARY_TRIGGER_TURNS ? parseInt(process.env.SUMMARY_TRIGGER_TURNS, 10) : 12,
  memoryPruneThreshold: 0.2,
  memoryCooldownMs: process.env.MEMORY_COOLDOWN_MS ? parseInt(process.env.MEMORY_COOLDOWN_MS, 10) : 3 * 60 * 1000,
  maxMemories: 8000,
  enableShortTermSummary: process.env.ENABLE_SHORT_TERM_SUMMARY !== 'false',
  memoryRecallSimilarityThreshold: process.env.MEMORY_RECALL_SIMILARITY_THRESHOLD
    ? parseFloat(process.env.MEMORY_RECALL_SIMILARITY_THRESHOLD)
    : 0.62,
  memoryRecallTriggerPatterns,
  relevantMemoryCount: 3,
  longTermFetchLimit: 120,
  // Optional local dashboard that runs alongside the bot. Enable with
--- a/src/memory.js
+++ b/src/memory.js
@@ -242,7 +242,10 @@ const fullShortTerm = (db, userId) =>
 const maybeSummarize = async (db, userId) => {
  const shortTermEntries = fullShortTerm(db, userId);
  const charCount = shortTermEntries.reduce((sum, msg) => sum + (msg.content?.length || 0), 0);
-  if (charCount < config.summaryTriggerChars || shortTermEntries.length < config.shortTermLimit) {
+  if (
    charCount < config.summaryTriggerChars &&
    shortTermEntries.length < (config.summaryTriggerTurns || config.shortTermLimit)
  ) {
    return false;
  }
  const userRow = get(db, 'SELECT summary FROM users WHERE id = ?', [userId]) || { summary: '' };
@@ -327,7 +330,7 @@ const retrieveRelevantMemories = async (db, userId, query, options = {}) => {
    return [];
  }
  const limit = config.longTermFetchLimit || 200;
-  const { includeAllUsers = false } = options;
+  const { includeAllUsers = false, minScore = Number.NEGATIVE_INFINITY } = options;
  const params = [];
  const whereClause = includeAllUsers ? '' : ' WHERE user_id = ?';
  if (!includeAllUsers) {
@@ -344,7 +347,7 @@ const retrieveRelevantMemories = async (db, userId, query, options = {}) => {
  }
  const now = Date.now();
  const cooldown = config.memoryCooldownMs || 0;
-  const usage = memoryUsageMap.get(userId);
+  const usage = getMemoryUsageMapForUser(userId);
  const eligibleRows =
    cooldown && usage
      ? rows.filter((entry) => now - (usage.get(entry.id) || 0) > cooldown)
@@ -360,13 +363,13 @@ const retrieveRelevantMemories = async (db, userId, query, options = {}) => {
        score: cosineSimilarity(queryEmbedding, embedding) + entry.importance * 0.1,
      };
    })
-    .sort((a, b) => b.score - a.score)
+    .sort((a, b) => b.score - a.score);
-    .slice(0, config.relevantMemoryCount);
+  const filtered = scored.filter((entry) => entry.score >= minScore);
-  if (scored.length) {
+  const capped = filtered.slice(0, config.relevantMemoryCount);
-    const usageMap = getMemoryUsageMapForUser(userId);
+  if (capped.length) {
-    scored.forEach((entry) => usageMap.set(entry.id, now));
+    capped.forEach((entry) => usage.set(entry.id, now));
  }
-  return scored;
+  return capped;
 };
 export async function appendShortTerm(userId, role, content) {
@@ -390,8 +393,18 @@ export async function prepareContext(userId, incomingMessage, options = {}) {
  ensureUser(db, userId);
  const userRow = get(db, 'SELECT summary FROM users WHERE id = ?', [userId]) || { summary: '' };
  const shortTerm = getShortTermHistory(db, userId, config.shortTermLimit);
-  const { includeAllUsers = false } = options;
+  const {
-  const memories = await retrieveRelevantMemories(db, userId, incomingMessage, { includeAllUsers });
+    includeAllUsers = false,
    includeLongTerm = true,
    memorySimilarityThreshold = Number.NEGATIVE_INFINITY,
  } = options;
  const memories =
    includeLongTerm && incomingMessage?.trim()
      ? await retrieveRelevantMemories(db, userId, incomingMessage, {
          includeAllUsers,
          minScore: memorySimilarityThreshold,
        })
      : [];
  return {
    shortTerm,
    summary: userRow.summary || '',
--- a/src/prompt.js
+++ b/src/prompt.js
@@ -95,9 +95,21 @@ export async function buildPrompt(userId, incomingText, options = {}) {
    context: providedContext = null,
    useGlobalMemories = false,
    userName = null,
    includeMemories = false,
    similarityThreshold = null,
  } = options;
  const context =
-    providedContext || (await prepareContext(userId, incomingText, { includeAllUsers: useGlobalMemories }));
+    providedContext ||
    (await prepareContext(userId, incomingText, {
      includeAllUsers: useGlobalMemories,
      includeLongTerm: includeMemories || useGlobalMemories,
      memorySimilarityThreshold:
        includeMemories && similarityThreshold !== null
          ? similarityThreshold
          : includeMemories
          ? config.memoryRecallSimilarityThreshold
          : Number.NEGATIVE_INFINITY,
    }));
  if (userName) {
    context.userName = userName;
  } else if (context.userName === undefined) {