This commit is contained in:
Luna
2026-03-03 20:44:52 +01:00
parent 65de299320
commit 931f222979
4 changed files with 11 additions and 4 deletions

View File

@@ -11,6 +11,10 @@ CODER_USER_ID=
ENABLE_DASHBOARD=false
# port for the dashboard if enabled
DASHBOARD_PORT=3000
# Max short-term turns stored per user (default 12)
SHORT_TERM_LIMIT=12
# Set to false to stop continually summarizing the short-term buffer
ENABLE_SHORT_TERM_SUMMARY=true
ENABLE_WEB_SEARCH=true
OPENAI_API_KEY=your_openai_api_key
# Memory retrieval cooldown (ms) before the same long-term entry can be reused

View File

@@ -95,9 +95,9 @@ Nova is a friendly, slightly witty Discord companion that chats naturally in DMs
CHANGELOG.md
```
- **Short-term (recency buffer):** Last 10 conversation turns kept verbatim for style and continuity. Stored per user inside `data/memory.sqlite`.
- **Short-term (recency buffer):** Last turns kept verbatim for style and continuity. `SHORT_TERM_LIMIT` (default 12) controls how many of those turns persist, and you can lower it further if you prefer tighter buffers.
- **Long-term (vector store):** Every user message + bot reply pair becomes an embedding via `text-embedding-3-small`. Embeddings, raw text, timestamps, and heuristic importance scores live in the same SQLite file. Retrieval uses cosine similarity plus a small importance boost; top 5 results feed the prompt.
- **Summary layer:** When the recency buffer grows past ~3000 characters, Nova asks OpenAI to condense the transcript to <120 words, keeps the summary, and trims the raw buffer down to the last few turns. This keeps token usage low while retaining story arcs.
- **Summary layer:** When the recency buffer grows past ~3000 characters, Nova asks OpenAI to condense the transcript to <120 words, keeps the summary, and trims the raw buffer down to the last few turns. This keeps token usage low while retaining story arcs, but you can disable it with `ENABLE_SHORT_TERM_SUMMARY=false` if you want the raw buffer to stay intact.
- **Importance scoring:** Messages mentioning intent words ("plan", "remember", etc.), showing length, or emotional weight receive higher scores. When the store exceeds its cap, the lowest-importance/oldest memories are pruned. You can also call `pruneLowImportanceMemories()` manually if needed.
- **Embedding math:** `text-embedding-3-small` returns 1,536 floating-point numbers for each text chunk. That giant array is a vector map of the messages meaning; similar moments land near each other in 1,536-dimensional space.

View File

@@ -29,13 +29,14 @@ export const config = {
maxCoderPingIntervalMs: 6 * 60 * 60 * 1000,
coderPingMinIntervalMs: process.env.CODER_PING_MIN_MS ? parseInt(process.env.CODER_PING_MIN_MS, 10) : 6 * 60 * 60 * 1000,
coderPingMaxIntervalMs: process.env.CODER_PING_MAX_MS ? parseInt(process.env.CODER_PING_MAX_MS, 10) : 4.5 * 60 * 60 * 1000,
shortTermLimit: 6,
shortTermLimit: process.env.SHORT_TERM_LIMIT ? parseInt(process.env.SHORT_TERM_LIMIT, 10) : 12,
memoryDbFile: process.env.MEMORY_DB_FILE ? path.resolve(process.env.MEMORY_DB_FILE) : defaultMemoryDbFile,
legacyMemoryFile,
summaryTriggerChars: 2200,
memoryPruneThreshold: 0.2,
memoryCooldownMs: process.env.MEMORY_COOLDOWN_MS ? parseInt(process.env.MEMORY_COOLDOWN_MS, 10) : 3 * 60 * 1000,
maxMemories: 8000,
enableShortTermSummary: process.env.ENABLE_SHORT_TERM_SUMMARY !== 'false',
relevantMemoryCount: 3,
longTermFetchLimit: 120,
// Optional local dashboard that runs alongside the bot. Enable with

View File

@@ -379,7 +379,9 @@ export async function appendShortTerm(userId, role, content) {
Date.now(),
]);
enforceShortTermCap(db, userId);
await maybeSummarize(db, userId);
if (config.enableShortTermSummary) {
await maybeSummarize(db, userId);
}
await persistDb(db);
}