diff --git a/.env.example b/.env.example index f173303..d57b9df 100644 --- a/.env.example +++ b/.env.example @@ -11,6 +11,10 @@ CODER_USER_ID= ENABLE_DASHBOARD=false # port for the dashboard if enabled DASHBOARD_PORT=3000 +# Max short-term turns stored per user (default 12) +SHORT_TERM_LIMIT=12 +# Set to false to stop continually summarizing the short-term buffer +ENABLE_SHORT_TERM_SUMMARY=true ENABLE_WEB_SEARCH=true OPENAI_API_KEY=your_openai_api_key # Memory retrieval cooldown (ms) before the same long-term entry can be reused diff --git a/README.md b/README.md index 169cb54..6868bae 100644 --- a/README.md +++ b/README.md @@ -95,9 +95,9 @@ Nova is a friendly, slightly witty Discord companion that chats naturally in DMs CHANGELOG.md ``` - - **Short-term (recency buffer):** Last 10 conversation turns kept verbatim for style and continuity. Stored per user inside `data/memory.sqlite`. + - **Short-term (recency buffer):** Last turns kept verbatim for style and continuity. `SHORT_TERM_LIMIT` (default 12) controls how many of those turns persist, and you can lower it further if you prefer tighter buffers. - **Long-term (vector store):** Every user message + bot reply pair becomes an embedding via `text-embedding-3-small`. Embeddings, raw text, timestamps, and heuristic importance scores live in the same SQLite file. Retrieval uses cosine similarity plus a small importance boost; top 5 results feed the prompt. - - **Summary layer:** When the recency buffer grows past ~3000 characters, Nova asks OpenAI to condense the transcript to <120 words, keeps the summary, and trims the raw buffer down to the last few turns. This keeps token usage low while retaining story arcs. + - **Summary layer:** When the recency buffer grows past ~3000 characters, Nova asks OpenAI to condense the transcript to <120 words, keeps the summary, and trims the raw buffer down to the last few turns. This keeps token usage low while retaining story arcs, but you can disable it with `ENABLE_SHORT_TERM_SUMMARY=false` if you want the raw buffer to stay intact. - **Importance scoring:** Messages mentioning intent words ("plan", "remember", etc.), showing length, or emotional weight receive higher scores. When the store exceeds its cap, the lowest-importance/oldest memories are pruned. You can also call `pruneLowImportanceMemories()` manually if needed. - **Embedding math:** `text-embedding-3-small` returns 1,536 floating-point numbers for each text chunk. That giant array is a vector map of the message’s meaning; similar moments land near each other in 1,536-dimensional space. diff --git a/src/config.js b/src/config.js index 29ebc59..77f4b3b 100644 --- a/src/config.js +++ b/src/config.js @@ -29,13 +29,14 @@ export const config = { maxCoderPingIntervalMs: 6 * 60 * 60 * 1000, coderPingMinIntervalMs: process.env.CODER_PING_MIN_MS ? parseInt(process.env.CODER_PING_MIN_MS, 10) : 6 * 60 * 60 * 1000, coderPingMaxIntervalMs: process.env.CODER_PING_MAX_MS ? parseInt(process.env.CODER_PING_MAX_MS, 10) : 4.5 * 60 * 60 * 1000, - shortTermLimit: 6, + shortTermLimit: process.env.SHORT_TERM_LIMIT ? parseInt(process.env.SHORT_TERM_LIMIT, 10) : 12, memoryDbFile: process.env.MEMORY_DB_FILE ? path.resolve(process.env.MEMORY_DB_FILE) : defaultMemoryDbFile, legacyMemoryFile, summaryTriggerChars: 2200, memoryPruneThreshold: 0.2, memoryCooldownMs: process.env.MEMORY_COOLDOWN_MS ? parseInt(process.env.MEMORY_COOLDOWN_MS, 10) : 3 * 60 * 1000, maxMemories: 8000, + enableShortTermSummary: process.env.ENABLE_SHORT_TERM_SUMMARY !== 'false', relevantMemoryCount: 3, longTermFetchLimit: 120, // Optional local dashboard that runs alongside the bot. Enable with diff --git a/src/memory.js b/src/memory.js index 1df3ae9..ce30146 100644 --- a/src/memory.js +++ b/src/memory.js @@ -379,7 +379,9 @@ export async function appendShortTerm(userId, role, content) { Date.now(), ]); enforceShortTermCap(db, userId); - await maybeSummarize(db, userId); + if (config.enableShortTermSummary) { + await maybeSummarize(db, userId); + } await persistDb(db); }