test

2026-03-03 20:44:52 +01:00
parent 65de299320
commit 931f222979
4 changed files with 11 additions and 4 deletions
--- a/.env.example
+++ b/.env.example
@@ -11,6 +11,10 @@ CODER_USER_ID=
 ENABLE_DASHBOARD=false
 # port for the dashboard if enabled
 DASHBOARD_PORT=3000
+# Max short-term turns stored per user (default 12)
+SHORT_TERM_LIMIT=12
+# Set to false to stop continually summarizing the short-term buffer
+ENABLE_SHORT_TERM_SUMMARY=true
 ENABLE_WEB_SEARCH=true
 OPENAI_API_KEY=your_openai_api_key
 # Memory retrieval cooldown (ms) before the same long-term entry can be reused
--- a/README.md
+++ b/README.md
@@ -95,9 +95,9 @@ Nova is a friendly, slightly witty Discord companion that chats naturally in DMs
   CHANGELOG.md
   ```

-   - **Short-term (recency buffer):** Last 10 conversation turns kept verbatim for style and continuity. Stored per user inside `data/memory.sqlite`.
+   - **Short-term (recency buffer):** Last turns kept verbatim for style and continuity. `SHORT_TERM_LIMIT` (default 12) controls how many of those turns persist, and you can lower it further if you prefer tighter buffers.
   - **Long-term (vector store):** Every user message + bot reply pair becomes an embedding via `text-embedding-3-small`. Embeddings, raw text, timestamps, and heuristic importance scores live in the same SQLite file. Retrieval uses cosine similarity plus a small importance boost; top 5 results feed the prompt.
-   - **Summary layer:** When the recency buffer grows past ~3000 characters, Nova asks OpenAI to condense the transcript to <120 words, keeps the summary, and trims the raw buffer down to the last few turns. This keeps token usage low while retaining story arcs.
+   - **Summary layer:** When the recency buffer grows past ~3000 characters, Nova asks OpenAI to condense the transcript to <120 words, keeps the summary, and trims the raw buffer down to the last few turns. This keeps token usage low while retaining story arcs, but you can disable it with `ENABLE_SHORT_TERM_SUMMARY=false` if you want the raw buffer to stay intact.
   - **Importance scoring:** Messages mentioning intent words ("plan", "remember", etc.), showing length, or emotional weight receive higher scores. When the store exceeds its cap, the lowest-importance/oldest memories are pruned. You can also call `pruneLowImportanceMemories()` manually if needed.

   - **Embedding math:** `text-embedding-3-small` returns 1,536 floating-point numbers for each text chunk. That giant array is a vector map of the message’s meaning; similar moments land near each other in 1,536-dimensional space.
--- a/src/config.js
+++ b/src/config.js
@@ -29,13 +29,14 @@ export const config = {
  maxCoderPingIntervalMs: 6 * 60 * 60 * 1000,
  coderPingMinIntervalMs: process.env.CODER_PING_MIN_MS ? parseInt(process.env.CODER_PING_MIN_MS, 10) : 6 * 60 * 60 * 1000,
  coderPingMaxIntervalMs: process.env.CODER_PING_MAX_MS ? parseInt(process.env.CODER_PING_MAX_MS, 10) : 4.5 * 60 * 60 * 1000,
-  shortTermLimit: 6,
+  shortTermLimit: process.env.SHORT_TERM_LIMIT ? parseInt(process.env.SHORT_TERM_LIMIT, 10) : 12,
  memoryDbFile: process.env.MEMORY_DB_FILE ? path.resolve(process.env.MEMORY_DB_FILE) : defaultMemoryDbFile,
  legacyMemoryFile,
  summaryTriggerChars: 2200,
  memoryPruneThreshold: 0.2,
  memoryCooldownMs: process.env.MEMORY_COOLDOWN_MS ? parseInt(process.env.MEMORY_COOLDOWN_MS, 10) : 3 * 60 * 1000,
  maxMemories: 8000,
+  enableShortTermSummary: process.env.ENABLE_SHORT_TERM_SUMMARY !== 'false',
  relevantMemoryCount: 3,
  longTermFetchLimit: 120,
  // Optional local dashboard that runs alongside the bot. Enable with
--- a/src/memory.js
+++ b/src/memory.js
@@ -379,7 +379,9 @@ export async function appendShortTerm(userId, role, content) {
    Date.now(),
  ]);
  enforceShortTermCap(db, userId);
-  await maybeSummarize(db, userId);
+  if (config.enableShortTermSummary) {
+    await maybeSummarize(db, userId);
+  }
  await persistDb(db);
 }