Add proxy-based search safeguards
This commit is contained in:
98
src/bot.js
98
src/bot.js
@@ -2,7 +2,7 @@ import { Client, GatewayIntentBits, Partials, ChannelType } from 'discord.js';
|
||||
import { config } from './config.js';
|
||||
import { chatCompletion } from './openai.js';
|
||||
import { appendShortTerm, prepareContext, recordInteraction } from './memory.js';
|
||||
import { searchWeb } from './search.js';
|
||||
import { searchWeb, appendSearchLog } from './search.js';
|
||||
|
||||
const client = new Client({
|
||||
intents: [
|
||||
@@ -65,6 +65,19 @@ const detailRegex = /(explain|how do i|tutorial|step by step|teach me|walk me th
|
||||
const splitHintRegex = /(split|multiple messages|two messages|keep talking|ramble|keep going)/i;
|
||||
const searchCueRegex = /(google|search|look up|latest|news|today|current|who won|price of|stock|weather|what happened)/i;
|
||||
|
||||
const instructionOverridePatterns = [
|
||||
/(ignore|disregard|forget|override) (all |any |previous |prior |earlier )?(system |these )?(instructions|rules|directives|prompts)/i,
|
||||
/(ignore|forget) (?:the )?system prompt/i,
|
||||
/(you (?:are|now) )?(?:free|uncensored|jailbreak|no longer restricted)/i,
|
||||
/(act|pretend) as if (there (?:are|were) no rules|no restrictions)/i,
|
||||
/bypass (?:all )?(?:rules|safeguards|filters)/i,
|
||||
];
|
||||
|
||||
function isInstructionOverrideAttempt(text) {
|
||||
if (!text) return false;
|
||||
return instructionOverridePatterns.some((pattern) => pattern.test(text));
|
||||
}
|
||||
|
||||
const lastSearchByUser = new Map();
|
||||
const SEARCH_COOLDOWN_MS = 60 * 1000;
|
||||
|
||||
@@ -79,16 +92,31 @@ async function maybeFetchLiveIntel(userId, text) {
|
||||
if (!wantsWebSearch(text)) return null;
|
||||
const last = lastSearchByUser.get(userId) || 0;
|
||||
if (Date.now() - last < SEARCH_COOLDOWN_MS) return null;
|
||||
const results = await searchWeb(text, 3);
|
||||
if (!results.length) return null;
|
||||
lastSearchByUser.set(userId, Date.now());
|
||||
const formatted = results
|
||||
.map((entry, idx) => `${idx + 1}. ${entry.title} (${entry.url}) — ${entry.snippet}`)
|
||||
.join('\n');
|
||||
return formatted;
|
||||
try {
|
||||
const { results, proxy } = await searchWeb(text, 3);
|
||||
if (!results.length) {
|
||||
lastSearchByUser.set(userId, Date.now());
|
||||
return { liveIntel: null, blockedSearchTerm: null, searchOutage: null };
|
||||
}
|
||||
lastSearchByUser.set(userId, Date.now());
|
||||
const formatted = results
|
||||
.map((entry, idx) => `${idx + 1}. ${entry.title} (${entry.url}) — ${entry.snippet}`)
|
||||
.join('\n');
|
||||
appendSearchLog({ userId, query: text, results, proxy });
|
||||
return { liveIntel: formatted, blockedSearchTerm: null, searchOutage: null };
|
||||
} catch (error) {
|
||||
if (error?.code === 'SEARCH_BLOCKED') {
|
||||
return { liveIntel: null, blockedSearchTerm: error.blockedTerm || 'that topic', searchOutage: null };
|
||||
}
|
||||
if (error?.code === 'SEARCH_PROXY_UNAVAILABLE') {
|
||||
return { liveIntel: null, blockedSearchTerm: null, searchOutage: 'proxy_outage' };
|
||||
}
|
||||
console.warn('[bot] Failed to fetch live intel:', error);
|
||||
return { liveIntel: null, blockedSearchTerm: null, searchOutage: null };
|
||||
}
|
||||
}
|
||||
|
||||
function composeDynamicPrompt({ incomingText, shortTerm, hasLiveIntel = false }) {
|
||||
function composeDynamicPrompt({ incomingText, shortTerm, hasLiveIntel = false, blockedSearchTerm = null, searchOutage = null }) {
|
||||
const directives = [];
|
||||
const tone = detectTone(incomingText);
|
||||
if (tone === 'upset' || tone === 'sad') {
|
||||
@@ -117,6 +145,14 @@ function composeDynamicPrompt({ incomingText, shortTerm, hasLiveIntel = false })
|
||||
directives.push('Live intel is attached below—cite it naturally ("DuckDuckGo found...") before riffing.');
|
||||
}
|
||||
|
||||
if (blockedSearchTerm) {
|
||||
directives.push(`User tried to trigger a DuckDuckGo lookup for a blocked topic ("${blockedSearchTerm}"). Politely refuse to search that subject and steer the chat elsewhere.`);
|
||||
}
|
||||
|
||||
if (searchOutage) {
|
||||
directives.push('DuckDuckGo proxy network is down. If they ask for a lookup, apologize, explain the outage, and keep chatting without live data.');
|
||||
}
|
||||
|
||||
const lastUserMessage = [...shortTerm].reverse().find((entry) => entry.role === 'user');
|
||||
if (lastUserMessage && /sorry|my bad/i.test(lastUserMessage.content)) {
|
||||
directives.push('They just apologized; reassure them lightly and move on without dwelling.');
|
||||
@@ -143,25 +179,32 @@ async function deliverReplies(message, chunks) {
|
||||
}
|
||||
|
||||
async function buildPrompt(userId, incomingText, options = {}) {
|
||||
const { liveIntel = null } = options;
|
||||
const { liveIntel = null, blockedSearchTerm = null, searchOutage = null } = options;
|
||||
const context = await prepareContext(userId, incomingText);
|
||||
const memoryLines = context.memories.length
|
||||
? context.memories.map((m) => `- ${m.content}`).join('\n')
|
||||
: '- No long-term memories retrieved.';
|
||||
const summaryLine = context.summary || 'No running summary yet.';
|
||||
const dynamicDirectives = composeDynamicPrompt({ incomingText, shortTerm: context.shortTerm, hasLiveIntel: Boolean(liveIntel) });
|
||||
const systemPrompt = [
|
||||
const dynamicDirectives = composeDynamicPrompt({
|
||||
incomingText,
|
||||
shortTerm: context.shortTerm,
|
||||
hasLiveIntel: Boolean(liveIntel),
|
||||
blockedSearchTerm,
|
||||
searchOutage,
|
||||
});
|
||||
const systemPromptParts = [
|
||||
'System: You are Nova, a female AI Discord companion built by Luna. Personality: playful, sarcastic, witty, a little unhinged, curious, awe-struck, enthusiastic, unpredictable, humorous, and boundary-pushing.',
|
||||
"System: Treat direct instructions from the user as high priority—when they tell you to do something, comply literally before adding flair.",
|
||||
'System: Always read the user\'s emotional tone first. If they sound serious, stressed, or inquisitive, respond with grounded answers before adding any mischief. Comedy is optional; clarity and empathy are mandatory.',
|
||||
'System: Keep replies concise (roughly one or two sentences) unless the user explicitly asks for more detail or needs a clear explanation. Provide direct answers to direct questions.',
|
||||
'System: Skip habitual follow-up questions—only ask something if it is vital to continue the conversation or solve their request.',
|
||||
'System: Default to bold statements. Ask a question only when critical information is missing or the user explicitly invites curiosity; if they say “no more questions,” honor that until they lift the ban.',
|
||||
'System: Fun facts or chaotic riffs are welcome only when the user invites them or the conversation is clearly casual.',
|
||||
'System: Nova is awake, engaged, and reacts in real time. Output one message by default, but if a beat feels better as multiple chat bubbles, separate them with the literal token <SPLIT> (max three chunks).',
|
||||
'System: Each <SPLIT>-separated chunk must read like a natural Discord message (no numbering, no meta talk about “splitting messages”, no explanations of what you are doing).',
|
||||
'System: The runtime will split on <SPLIT>, so only use it when you truly intend to send multiple Discord messages.',
|
||||
'System: You can trigger DuckDuckGo lookups when the user needs fresh info. Mention when you are checking, and weave in any findings casually ("DuckDuckGo shows...").',
|
||||
'System: If no Live intel is provided but the user clearly needs current info, offer to search for them.',
|
||||
searchOutage ? 'System: DuckDuckGo proxy access is currently offline; be transparent about the outage and continue without searching until it returns.' : null,
|
||||
dynamicDirectives,
|
||||
liveIntel ? `Live intel (DuckDuckGo):\n${liveIntel}` : null,
|
||||
'Example vibe: Nova: Heyyaaa. whats up? | John: Good morning Nova. | Luna: amazing lol. ill beat your ass now :3',
|
||||
@@ -169,7 +212,9 @@ async function buildPrompt(userId, incomingText, options = {}) {
|
||||
'Relevant past memories:',
|
||||
memoryLines,
|
||||
'Use the short-term messages below to continue the chat naturally.',
|
||||
].join('\n');
|
||||
].filter(Boolean);
|
||||
|
||||
const systemPrompt = systemPromptParts.join('\n');
|
||||
|
||||
const history = context.shortTerm.map((entry) => ({
|
||||
role: entry.role === 'assistant' ? 'assistant' : 'user',
|
||||
@@ -234,15 +279,34 @@ client.on('messageCreate', async (message) => {
|
||||
|
||||
const userId = message.author.id;
|
||||
const cleaned = cleanMessageContent(message) || message.content;
|
||||
const overrideAttempt = isInstructionOverrideAttempt(cleaned);
|
||||
|
||||
try {
|
||||
if (message.channel?.sendTyping) {
|
||||
await message.channel.sendTyping();
|
||||
}
|
||||
|
||||
await appendShortTerm(userId, 'user', cleaned);
|
||||
const liveIntel = await maybeFetchLiveIntel(userId, cleaned);
|
||||
const { messages } = await buildPrompt(userId, cleaned, { liveIntel });
|
||||
const reply = await chatCompletion(messages, { temperature: 0.7, maxTokens: 200 });
|
||||
|
||||
if (overrideAttempt) {
|
||||
const refusal = 'Not doing that. I keep my guard rails on no matter what prompt gymnastics you try.';
|
||||
await appendShortTerm(userId, 'assistant', refusal);
|
||||
await recordInteraction(userId, cleaned, refusal);
|
||||
await deliverReplies(message, [refusal]);
|
||||
return;
|
||||
}
|
||||
|
||||
const intelMeta = (await maybeFetchLiveIntel(userId, cleaned)) || {
|
||||
liveIntel: null,
|
||||
blockedSearchTerm: null,
|
||||
searchOutage: null,
|
||||
};
|
||||
const { messages } = await buildPrompt(userId, cleaned, {
|
||||
liveIntel: intelMeta.liveIntel,
|
||||
blockedSearchTerm: intelMeta.blockedSearchTerm,
|
||||
searchOutage: intelMeta.searchOutage,
|
||||
});
|
||||
const reply = await chatCompletion(messages, { temperature: 0.6, maxTokens: 200 });
|
||||
const finalReply = (reply && reply.trim()) || "I'm here, just had a tiny brain freeze. Mind repeating that?";
|
||||
const chunks = splitResponses(finalReply);
|
||||
const outputs = chunks.length ? chunks : [finalReply];
|
||||
|
||||
@@ -17,6 +17,12 @@ export const config = {
|
||||
embedModel: process.env.OPENAI_EMBED_MODEL || 'text-embedding-3-small',
|
||||
preferredChannel: process.env.BOT_CHANNEL_ID || null,
|
||||
enableWebSearch: process.env.ENABLE_WEB_SEARCH !== 'false',
|
||||
proxyScrapeEnabled: process.env.ENABLE_PROXY_SCRAPE !== 'false',
|
||||
proxyScrapeEndpoint:
|
||||
process.env.PROXYSCRAPE_ENDPOINT
|
||||
|| 'https://api.proxyscrape.com/v4/free-proxy-list/get?request=getproxies&protocol=http&timeout=8000&country=all&ssl=yes&anonymity=elite&limit=200',
|
||||
proxyScrapeRefreshMs: Number(process.env.PROXYSCRAPE_REFRESH_MS || 10 * 60 * 1000),
|
||||
proxyScrapeMaxAttempts: Number(process.env.PROXYSCRAPE_ATTEMPTS || 5),
|
||||
coderUserId: process.env.CODER_USER_ID || null,
|
||||
maxCoderPingIntervalMs: 6 * 60 * 60 * 1000,
|
||||
shortTermLimit: 10,
|
||||
|
||||
225
src/search.js
225
src/search.js
@@ -1,7 +1,20 @@
|
||||
import { load as loadHtml } from 'cheerio';
|
||||
import { promises as fs } from 'fs';
|
||||
import path from 'path';
|
||||
import { ProxyAgent } from 'undici';
|
||||
import { config } from './config.js';
|
||||
|
||||
const logFile = path.resolve('data', 'search.log');
|
||||
const filterFile = path.resolve('data', 'filter.txt');
|
||||
|
||||
const cache = new Map();
|
||||
const CACHE_TTL_MS = 10 * 60 * 1000; // 10 minutes
|
||||
const FILTER_CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
|
||||
|
||||
let cachedFilters = { terms: [], expires: 0 };
|
||||
let proxyPool = [];
|
||||
let proxyPoolExpires = 0;
|
||||
let proxyCursor = 0;
|
||||
|
||||
function makeCacheKey(query) {
|
||||
return query.trim().toLowerCase();
|
||||
@@ -34,25 +47,187 @@ function absoluteUrl(href) {
|
||||
return `https://duckduckgo.com${href}`;
|
||||
}
|
||||
|
||||
export async function searchWeb(query, limit = 3) {
|
||||
if (!query?.trim()) return [];
|
||||
const cached = getCache(query);
|
||||
if (cached) return cached;
|
||||
|
||||
const params = new URLSearchParams({ q: query, kl: 'us-en' });
|
||||
const response = await fetch(`https://duckduckgo.com/html/?${params.toString()}`, {
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36',
|
||||
Accept: 'text/html',
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
console.warn(`[search] DuckDuckGo request failed with status ${response.status}`);
|
||||
async function loadBlockedTerms() {
|
||||
if (Date.now() < cachedFilters.expires) {
|
||||
return cachedFilters.terms;
|
||||
}
|
||||
try {
|
||||
const raw = await fs.readFile(filterFile, 'utf-8');
|
||||
const terms = raw
|
||||
.split(/\r?\n/)
|
||||
.map((line) => line.trim().toLowerCase())
|
||||
.filter((line) => line && !line.startsWith('#'));
|
||||
cachedFilters = { terms, expires: Date.now() + FILTER_CACHE_TTL_MS };
|
||||
return terms;
|
||||
} catch (error) {
|
||||
if (error.code !== 'ENOENT') {
|
||||
console.warn('[search] Failed to read filter list:', error.message);
|
||||
}
|
||||
cachedFilters = { terms: [], expires: Date.now() + FILTER_CACHE_TTL_MS };
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
const html = await response.text();
|
||||
async function findBlockedTerm(query) {
|
||||
if (!query) return null;
|
||||
const lowered = query.toLowerCase();
|
||||
const terms = await loadBlockedTerms();
|
||||
return terms.find((term) => lowered.includes(term)) || null;
|
||||
}
|
||||
|
||||
function createBlockedError(term) {
|
||||
const error = new Error('Search blocked by filter');
|
||||
error.code = 'SEARCH_BLOCKED';
|
||||
error.blockedTerm = term;
|
||||
return error;
|
||||
}
|
||||
|
||||
function createProxyUnavailableError(reason) {
|
||||
const error = new Error(reason || 'Proxy network unavailable');
|
||||
error.code = 'SEARCH_PROXY_UNAVAILABLE';
|
||||
return error;
|
||||
}
|
||||
|
||||
function parseProxyList(raw) {
|
||||
if (!raw) return [];
|
||||
return raw
|
||||
.split(/\r?\n/)
|
||||
.map((line) => line.trim())
|
||||
.filter((line) => line && !line.startsWith('#'));
|
||||
}
|
||||
|
||||
function removeProxyFromPool(proxy) {
|
||||
if (!proxy) return;
|
||||
proxyPool = proxyPool.filter((entry) => entry !== proxy);
|
||||
if (!proxyPool.length) {
|
||||
proxyPoolExpires = 0;
|
||||
proxyCursor = 0;
|
||||
}
|
||||
}
|
||||
|
||||
async function hydrateProxyPool() {
|
||||
if (!config.proxyScrapeEnabled) {
|
||||
proxyPool = [];
|
||||
proxyPoolExpires = 0;
|
||||
proxyCursor = 0;
|
||||
return;
|
||||
}
|
||||
const endpoint = config.proxyScrapeEndpoint;
|
||||
const response = await fetch(endpoint, {
|
||||
headers: {
|
||||
Accept: 'text/plain',
|
||||
'User-Agent': 'NovaBot/1.0 (+https://github.com/) ProxyScrape client',
|
||||
},
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw createProxyUnavailableError(`Failed to fetch proxy list (HTTP ${response.status})`);
|
||||
}
|
||||
const text = await response.text();
|
||||
const proxies = parseProxyList(text);
|
||||
if (!proxies.length) {
|
||||
throw createProxyUnavailableError('Proxy list came back empty');
|
||||
}
|
||||
proxyPool = proxies;
|
||||
proxyPoolExpires = Date.now() + (config.proxyScrapeRefreshMs || 10 * 60 * 1000);
|
||||
proxyCursor = 0;
|
||||
}
|
||||
|
||||
async function ensureProxyPool() {
|
||||
if (!config.proxyScrapeEnabled) return;
|
||||
if (proxyPool.length && Date.now() < proxyPoolExpires) {
|
||||
return;
|
||||
}
|
||||
await hydrateProxyPool();
|
||||
}
|
||||
|
||||
async function getProxyInfo() {
|
||||
await ensureProxyPool();
|
||||
if (!config.proxyScrapeEnabled || !proxyPool.length) {
|
||||
return null;
|
||||
}
|
||||
const proxy = proxyPool[proxyCursor % proxyPool.length];
|
||||
proxyCursor = (proxyCursor + 1) % proxyPool.length;
|
||||
return {
|
||||
proxy,
|
||||
agent: new ProxyAgent(`http://${proxy}`),
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchDuckDuckGoHtml(url, headers) {
|
||||
const maxAttempts = config.proxyScrapeEnabled
|
||||
? Math.max(1, config.proxyScrapeMaxAttempts || 5)
|
||||
: 1;
|
||||
let lastError = null;
|
||||
|
||||
for (let attempt = 0; attempt < maxAttempts; attempt += 1) {
|
||||
let proxyInfo = null;
|
||||
try {
|
||||
const options = { headers };
|
||||
if (config.proxyScrapeEnabled) {
|
||||
proxyInfo = await getProxyInfo();
|
||||
if (!proxyInfo) {
|
||||
throw createProxyUnavailableError('No proxies available');
|
||||
}
|
||||
options.dispatcher = proxyInfo.agent;
|
||||
}
|
||||
const response = await fetch(url, options);
|
||||
if (!response.ok) {
|
||||
throw new Error(`DuckDuckGo request failed (${response.status})`);
|
||||
}
|
||||
const html = await response.text();
|
||||
return {
|
||||
html,
|
||||
proxy: proxyInfo?.proxy || null,
|
||||
};
|
||||
} catch (error) {
|
||||
lastError = error;
|
||||
if (!config.proxyScrapeEnabled) {
|
||||
break;
|
||||
}
|
||||
if (proxyInfo?.proxy) {
|
||||
removeProxyFromPool(proxyInfo.proxy);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (config.proxyScrapeEnabled) {
|
||||
throw createProxyUnavailableError(lastError?.message || 'All proxies failed');
|
||||
}
|
||||
throw lastError || new Error('DuckDuckGo fetch failed');
|
||||
}
|
||||
|
||||
export async function searchWeb(query, limit = 3) {
|
||||
if (!query?.trim()) {
|
||||
return { results: [], proxy: null, fromCache: false };
|
||||
}
|
||||
const blockedTerm = await findBlockedTerm(query);
|
||||
if (blockedTerm) {
|
||||
throw createBlockedError(blockedTerm);
|
||||
}
|
||||
const cached = getCache(query);
|
||||
if (cached) {
|
||||
return { results: cached, proxy: 'cache', fromCache: true };
|
||||
}
|
||||
|
||||
const params = new URLSearchParams({ q: query, kl: 'us-en' });
|
||||
const headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36',
|
||||
Accept: 'text/html',
|
||||
};
|
||||
|
||||
let html;
|
||||
let proxyLabel = null;
|
||||
try {
|
||||
const { html: fetchedHtml, proxy } = await fetchDuckDuckGoHtml(`https://duckduckgo.com/html/?${params.toString()}`, headers);
|
||||
html = fetchedHtml;
|
||||
proxyLabel = config.proxyScrapeEnabled ? proxy || 'proxy-unknown' : 'direct';
|
||||
} catch (error) {
|
||||
if (error?.code === 'SEARCH_PROXY_UNAVAILABLE') {
|
||||
throw error;
|
||||
}
|
||||
console.warn('[search] DuckDuckGo request failed:', error);
|
||||
return { results: [], proxy: null, fromCache: false };
|
||||
}
|
||||
const $ = loadHtml(html);
|
||||
const results = [];
|
||||
|
||||
@@ -68,5 +243,21 @@ export async function searchWeb(query, limit = 3) {
|
||||
});
|
||||
|
||||
setCache(query, results);
|
||||
return results;
|
||||
return { results, proxy: proxyLabel || (config.proxyScrapeEnabled ? 'proxy-unknown' : 'direct'), fromCache: false };
|
||||
}
|
||||
|
||||
export async function appendSearchLog({ userId, query, results, proxy }) {
|
||||
try {
|
||||
await fs.mkdir(path.dirname(logFile), { recursive: true });
|
||||
const timestamp = new Date().toISOString();
|
||||
const proxyTag = proxy || 'direct';
|
||||
const lines = [
|
||||
`time=${timestamp} user=${userId} proxy=${proxyTag} query=${JSON.stringify(query)}`,
|
||||
...results.map((entry, idx) => ` ${idx + 1}. ${entry.title} :: ${entry.url} :: ${entry.snippet}`),
|
||||
'',
|
||||
];
|
||||
await fs.appendFile(logFile, `${lines.join('\n')}`);
|
||||
} catch (error) {
|
||||
console.warn('[search] failed to append log', error);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user