import OpenAI from 'openai' import type { ScrapedContent, EnhancedProductAnalysis, Feature, Problem, Persona, Keyword, UseCase, Competitor, DorkQuery } from './types' const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }) async function aiGenerate(prompt: string, systemPrompt: string, temperature: number = 0.3): Promise { const response = await openai.chat.completions.create({ model: 'gpt-4o-mini', messages: [{ role: 'system', content: systemPrompt }, { role: 'user', content: prompt }], temperature, max_tokens: 4000 }) const content = response.choices[0].message.content || '{}' const codeBlockMatch = content.match(/```(?:json)?\s*([\s\S]*?)```/) const jsonMatch = content.match(/(\{[\s\S]*\})/) let jsonStr: string if (codeBlockMatch && codeBlockMatch[1]) jsonStr = codeBlockMatch[1].trim() else if (jsonMatch && jsonMatch[1]) jsonStr = jsonMatch[1].trim() else jsonStr = content.trim() try { return JSON.parse(jsonStr) as T } catch (e) { console.error('Failed to parse JSON:', jsonStr.substring(0, 200)) throw new Error('Invalid JSON response from AI') } } async function extractFeatures(content: ScrapedContent): Promise { const systemPrompt = `Extract EVERY feature from website content. Be exhaustive.` const prompt = `Extract features from: Title: ${content.title} Description: ${content.metaDescription} Headings: ${content.headings.slice(0, 15).join('\n')} Paragraphs: ${content.paragraphs.slice(0, 10).join('\n\n')} Feature Lists: ${content.featureList.slice(0, 15).join('\n')} Return JSON: {"features": [{"name": "...", "description": "...", "benefits": ["..."], "useCases": ["..."]}]} Aim for 10-15 features.` const result = await aiGenerate<{ features: Feature[] }>(prompt, systemPrompt, 0.4) return result.features.slice(0, 20) } async function identifyCompetitors(content: ScrapedContent): Promise { const systemPrompt = `Identify real, named competitors. Use actual company/product names like "Asana", "Jira", "Monday.com", "Trello", "Notion". Never use generic names like "Competitor A".` const prompt = `Identify 5-6 real competitors for: ${content.title} Description: ${content.metaDescription} Return EXACT JSON format: { "competitors": [ { "name": "Asana", "differentiator": "Why this product is better", "theirStrength": "What they do well", "switchTrigger": "Why users switch", "theirWeakness": "Their main weakness" } ] } Include: Direct competitors (same space), Big players, Popular alternatives, Tools people misuse for this. Use ONLY real product names.` const result = await aiGenerate<{ competitors: Competitor[] }>(prompt, systemPrompt, 0.3) // Validate competitor names aren't generic return result.competitors.map(c => ({ ...c, name: c.name.replace(/^Competitor\s+[A-Z]$/i, 'Alternative Solution').replace(/^Generic\s+/i, '') })).filter(c => c.name.length > 1) } async function generateKeywords(features: Feature[], content: ScrapedContent, competitors: Competitor[]): Promise { const systemPrompt = `Generate search-ready phrases users would actually type.` const featuresText = features.map(f => f.name).join(', ') const competitorNames = competitors.map(c => c.name).filter(n => n.length > 1).join(', ') || 'Jira, Asana, Monday, Trello' const prompt = `Generate 60-80 search phrases for: ${content.title} Features: ${featuresText} Competitors: ${competitorNames} CRITICAL - Follow this priority: 1. 60% 2-4 word phrases (e.g., "client onboarding checklist", "bug triage workflow") 2. 25% differentiation phrases (e.g., "asana alternative", "faster than jira") 3. 15% single-word brand terms only (product/competitor names) Return JSON: {"keywords": [{"term": "phrase", "type": "differentiator|product|feature|problem|solution|competitor", "searchVolume": "high|medium|low", "intent": "informational|navigational|transactional", "funnel": "awareness|consideration|decision", "emotionalIntensity": "frustrated|curious|ready"}]} Generate 20+ differentiator phrases comparing to: ${competitorNames}` const result = await aiGenerate<{ keywords: Keyword[] }>(prompt, systemPrompt, 0.4) const stopTerms = new Set([ 'platform', 'solution', 'tool', 'software', 'app', 'system', 'product', 'service', ]) const normalized = result.keywords .map((keyword) => ({ ...keyword, term: keyword.term.trim() })) .filter((keyword) => keyword.term.length > 2) .filter((keyword) => { const words = keyword.term.split(/\s+/).filter(Boolean) if (words.length === 1) { return keyword.type === 'product' || keyword.type === 'competitor' || keyword.type === 'differentiator' } return words.length <= 4 }) .filter((keyword) => !stopTerms.has(keyword.term.toLowerCase())) // Sort: differentiators first, then by word count return normalized.sort((a, b) => { const aDiff = a.type === 'differentiator' ? 0 : 1 const bDiff = b.type === 'differentiator' ? 0 : 1 if (aDiff !== bDiff) return aDiff - bDiff const aWords = a.term.split(/\s+/).length const bWords = b.term.split(/\s+/).length if (aWords !== bWords) return aWords - bWords return a.term.length - b.term.length }).slice(0, 80) } async function identifyProblems(features: Feature[], content: ScrapedContent): Promise { const systemPrompt = `Identify problems using JTBD framework.` const prompt = `Identify 8-12 problems solved by: ${features.map(f => f.name).join(', ')} Content: ${content.rawText.slice(0, 3000)} Return JSON: {"problems": [{"problem": "...", "severity": "high|medium|low", "currentWorkarounds": ["..."], "emotionalImpact": "...", "searchTerms": ["..."]}]}` const result = await aiGenerate<{ problems: Problem[] }>(prompt, systemPrompt, 0.4) return result.problems } async function generatePersonas(content: ScrapedContent, problems: Problem[]): Promise { const systemPrompt = `Create diverse user personas with search behavior.` const prompt = `Create 4-5 personas for: ${content.title} Description: ${content.metaDescription} Problems: ${problems.map(p => p.problem).slice(0, 5).join(', ')} Return JSON: {"personas": [{"name": "Descriptive name", "role": "Job title", "companySize": "e.g. 10-50 employees", "industry": "...", "painPoints": ["..."], "goals": ["..."], "techSavvy": "low|medium|high", "objections": ["..."], "searchBehavior": ["..."]}]}` const result = await aiGenerate<{ personas: Persona[] }>(prompt, systemPrompt, 0.5) return result.personas } async function generateUseCases(features: Feature[], personas: Persona[], problems: Problem[]): Promise { const systemPrompt = `Create JTBD use case scenarios.` const prompt = `Create 10 use cases. Features: ${features.map(f => f.name).slice(0, 5).join(', ')} Problems: ${problems.map(p => p.problem).slice(0, 3).join(', ')} Return JSON: {"useCases": [{"scenario": "...", "trigger": "...", "emotionalState": "...", "currentWorkflow": ["..."], "desiredOutcome": "...", "alternativeProducts": ["..."], "whyThisProduct": "...", "churnRisk": ["..."]}]}` const result = await aiGenerate<{ useCases: UseCase[] }>(prompt, systemPrompt, 0.5) return result.useCases } function generateDorkQueries(keywords: Keyword[], problems: Problem[], useCases: UseCase[], competitors: Competitor[]): DorkQuery[] { const queries: DorkQuery[] = [] const topKeywords = keywords.slice(0, 20).map(k => k.term) const topProblems = problems.slice(0, 5).map(p => p.problem) const competitorNames = competitors.map(c => c.name).filter(n => n.length > 1) // Differentiation queries (HIGH PRIORITY) competitorNames.forEach(comp => { queries.push({ query: `site:reddit.com "${comp}" ("alternative" OR "switching from" OR "moving away from")`, platform: 'reddit', intent: 'alternative', priority: 'high' }) queries.push({ query: `site:reddit.com "${comp}" ("better than" OR "vs" OR "versus" OR "compared to")`, platform: 'reddit', intent: 'comparison', priority: 'high' }) }) // Keyword-based queries const redditIntents = [ { template: 'site:reddit.com "{term}" ("looking for" OR "recommendation")', intent: 'looking-for' as const }, { template: 'site:reddit.com "{term}" ("frustrated" OR "hate" OR "sucks")', intent: 'frustrated' as const }, { template: 'site:reddit.com "{term}" ("tired of" OR "fed up")', intent: 'frustrated' as const }, ] topKeywords.slice(0, 10).forEach(term => { redditIntents.forEach(({ template, intent }) => { queries.push({ query: template.replace('{term}', term), platform: 'reddit', intent, priority: intent === 'frustrated' ? 'high' : 'medium' }) }) }) // Problem-based queries topProblems.forEach(problem => { queries.push({ query: `site:reddit.com "${problem}" ("how to" OR "solution")`, platform: 'reddit', intent: 'problem-solving', priority: 'high' }) }) // Hacker News topKeywords.slice(0, 8).forEach(term => { queries.push({ query: `site:news.ycombinator.com "Ask HN" "${term}"`, platform: 'hackernews', intent: 'looking-for', priority: 'high' }) }) // Indie Hackers topKeywords.slice(0, 6).forEach(term => { queries.push({ query: `site:indiehackers.com "${term}" ("looking for" OR "need")`, platform: 'indiehackers', intent: 'looking-for', priority: 'medium' }) }) return queries } type AnalysisProgressUpdate = { key: "features" | "competitors" | "keywords" | "problems" | "useCases" | "dorkQueries" status: "running" | "completed" detail?: string } export async function performDeepAnalysis( content: ScrapedContent, onProgress?: (update: AnalysisProgressUpdate) => void | Promise ): Promise { console.log('🔍 Starting deep analysis...') console.log(' 📦 Pass 1: Features...') await onProgress?.({ key: "features", status: "running" }) const features = await extractFeatures(content) console.log(` ✓ ${features.length} features`) await onProgress?.({ key: "features", status: "completed", detail: `${features.length} features` }) console.log(' 🏆 Pass 2: Competitors...') await onProgress?.({ key: "competitors", status: "running" }) const competitors = await identifyCompetitors(content) console.log(` ✓ ${competitors.length} competitors: ${competitors.map(c => c.name).join(', ')}`) await onProgress?.({ key: "competitors", status: "completed", detail: `${competitors.length} competitors: ${competitors.map(c => c.name).join(', ')}` }) console.log(' 🔑 Pass 3: Keywords...') await onProgress?.({ key: "keywords", status: "running" }) const keywords = await generateKeywords(features, content, competitors) console.log(` ✓ ${keywords.length} keywords (${keywords.filter(k => k.type === 'differentiator').length} differentiators)`) await onProgress?.({ key: "keywords", status: "completed", detail: `${keywords.length} keywords (${keywords.filter(k => k.type === 'differentiator').length} differentiators)` }) console.log(' 🎯 Pass 4: Problems...') await onProgress?.({ key: "problems", status: "running" }) const problems = await identifyProblems(features, content) const personas = await generatePersonas(content, problems) console.log(` ✓ ${problems.length} problems, ${personas.length} personas`) await onProgress?.({ key: "problems", status: "completed", detail: `${problems.length} problems, ${personas.length} personas` }) console.log(' 💡 Pass 5: Use cases...') await onProgress?.({ key: "useCases", status: "running" }) const useCases = await generateUseCases(features, personas, problems) console.log(` ✓ ${useCases.length} use cases`) await onProgress?.({ key: "useCases", status: "completed", detail: `${useCases.length} use cases` }) console.log(' 🔎 Pass 6: Dork queries...') await onProgress?.({ key: "dorkQueries", status: "running" }) const dorkQueries = generateDorkQueries(keywords, problems, useCases, competitors) console.log(` ✓ ${dorkQueries.length} queries`) await onProgress?.({ key: "dorkQueries", status: "completed", detail: `${dorkQueries.length} queries` }) const productName = content.title.split(/[\|\-–—:]/)[0].trim() const tagline = content.metaDescription.split('.')[0] return { productName, tagline, description: content.metaDescription, category: '', positioning: '', features, problemsSolved: problems, personas, keywords, useCases, competitors, dorkQueries, scrapedAt: new Date().toISOString(), analysisVersion: '2.1-optimized' } }