166 lines
4.8 KiB
JavaScript
166 lines
4.8 KiB
JavaScript
import { load as loadHtml } from 'cheerio';
|
|
import { promises as fs } from 'fs';
|
|
import path from 'path';
|
|
|
|
const logFile = path.resolve('data', 'search.log');
|
|
const filterFile = path.resolve('data', 'filter.txt');
|
|
|
|
const cache = new Map();
|
|
const CACHE_TTL_MS = 10 * 60 * 1000; // 10 minutes
|
|
const FILTER_CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
|
|
|
|
let cachedFilters = { terms: [], expires: 0 };
|
|
|
|
function makeCacheKey(query) {
|
|
return query.trim().toLowerCase();
|
|
}
|
|
|
|
function setCache(query, data) {
|
|
const key = makeCacheKey(query);
|
|
cache.set(key, { data, expires: Date.now() + CACHE_TTL_MS });
|
|
}
|
|
|
|
function getCache(query) {
|
|
const key = makeCacheKey(query);
|
|
const cached = cache.get(key);
|
|
if (!cached) return null;
|
|
if (Date.now() > cached.expires) {
|
|
cache.delete(key);
|
|
return null;
|
|
}
|
|
return cached.data;
|
|
}
|
|
|
|
function sanitizeText(text) {
|
|
if (!text) return '';
|
|
return text.replace(/\s+/g, ' ').trim();
|
|
}
|
|
|
|
function absoluteUrl(href) {
|
|
if (!href) return '';
|
|
if (href.startsWith('http://') || href.startsWith('https://')) {
|
|
return href;
|
|
}
|
|
return `https://duckduckgo.com${href}`;
|
|
}
|
|
|
|
async function loadBlockedTerms() {
|
|
if (Date.now() < cachedFilters.expires) {
|
|
return cachedFilters.terms;
|
|
}
|
|
try {
|
|
const raw = await fs.readFile(filterFile, 'utf-8');
|
|
const terms = raw
|
|
.split(/\r?\n/)
|
|
.map((line) => line.trim().toLowerCase())
|
|
.filter((line) => line && !line.startsWith('#'));
|
|
cachedFilters = { terms, expires: Date.now() + FILTER_CACHE_TTL_MS };
|
|
return terms;
|
|
} catch (error) {
|
|
if (error.code !== 'ENOENT') {
|
|
console.warn('[search] Failed to read filter list:', error.message);
|
|
}
|
|
cachedFilters = { terms: [], expires: Date.now() + FILTER_CACHE_TTL_MS };
|
|
return [];
|
|
}
|
|
}
|
|
|
|
async function findBlockedTerm(query) {
|
|
if (!query) return null;
|
|
const lowered = query.toLowerCase();
|
|
const terms = await loadBlockedTerms();
|
|
return terms.find((term) => lowered.includes(term)) || null;
|
|
}
|
|
|
|
export async function detectFilteredPhrase(text) {
|
|
return findBlockedTerm(text);
|
|
}
|
|
|
|
function createBlockedError(term) {
|
|
const error = new Error('Search blocked by filter');
|
|
error.code = 'SEARCH_BLOCKED';
|
|
error.blockedTerm = term;
|
|
return error;
|
|
}
|
|
|
|
function createSearchUnavailableError(reason) {
|
|
const error = new Error(reason || 'Search network unavailable');
|
|
error.code = 'SEARCH_NETWORK_UNAVAILABLE';
|
|
return error;
|
|
}
|
|
|
|
function parseDuckDuckGoResults(html, limit) {
|
|
const $ = loadHtml(html);
|
|
const results = [];
|
|
|
|
$('.result').each((_, el) => {
|
|
if (results.length >= limit) return false;
|
|
const title = sanitizeText($(el).find('.result__title').text());
|
|
const href = absoluteUrl($(el).find('.result__url').attr('href'));
|
|
const snippet = sanitizeText($(el).find('.result__snippet').text());
|
|
if (title && href) {
|
|
results.push({ title, url: href, snippet });
|
|
}
|
|
return undefined;
|
|
});
|
|
|
|
return results;
|
|
}
|
|
|
|
export async function searchWeb(query, limit = 3) {
|
|
if (!query?.trim()) {
|
|
return { results: [], proxy: 'duckduckgo', fromCache: false };
|
|
}
|
|
|
|
const blockedTerm = await findBlockedTerm(query);
|
|
if (blockedTerm) {
|
|
throw createBlockedError(blockedTerm);
|
|
}
|
|
|
|
const cached = getCache(query);
|
|
if (cached) {
|
|
return { results: cached, proxy: 'duckduckgo-cache', fromCache: true };
|
|
}
|
|
|
|
const params = new URLSearchParams({ q: query, kl: 'us-en' });
|
|
let response;
|
|
try {
|
|
response = await fetch(`https://duckduckgo.com/html/?${params.toString()}`, {
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36',
|
|
Accept: 'text/html',
|
|
},
|
|
});
|
|
} catch (error) {
|
|
console.warn('[search] DuckDuckGo request failed:', error.message);
|
|
throw createSearchUnavailableError('DuckDuckGo request failed');
|
|
}
|
|
|
|
if (!response.ok) {
|
|
console.warn(`[search] DuckDuckGo request failed with status ${response.status}`);
|
|
throw createSearchUnavailableError(`DuckDuckGo response ${response.status}`);
|
|
}
|
|
|
|
const html = await response.text();
|
|
const results = parseDuckDuckGoResults(html, limit);
|
|
|
|
setCache(query, results);
|
|
return { results, proxy: 'duckduckgo', fromCache: false };
|
|
}
|
|
|
|
export async function appendSearchLog({ userId, query, results, proxy }) {
|
|
try {
|
|
await fs.mkdir(path.dirname(logFile), { recursive: true });
|
|
const timestamp = new Date().toISOString();
|
|
const proxyTag = proxy || 'duckduckgo';
|
|
const lines = [
|
|
`time=${timestamp} user=${userId} proxy=${proxyTag} query=${JSON.stringify(query)}`,
|
|
...results.map((entry, idx) => ` ${idx + 1}. ${entry.title} :: ${entry.url} :: ${entry.snippet}`),
|
|
'',
|
|
];
|
|
await fs.appendFile(logFile, `${lines.join('\n')}`);
|
|
} catch (error) {
|
|
console.warn('[search] failed to append log', error);
|
|
}
|
|
}
|