import OpenAI from 'openai' import type { ScrapedContent, EnhancedProductAnalysis, Feature, Problem, Persona, Keyword, UseCase, Competitor, DorkQuery } from './types' const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }) type ProductProfile = { productName: string category: string primaryJTBD: string targetPersona: string scopeBoundary: string nonGoals: string[] differentiators: string[] evidence: { claim: string; snippet: string }[] confidence: number } async function aiGenerate(prompt: string, systemPrompt: string, temperature: number = 0.3): Promise { const response = await openai.chat.completions.create({ model: 'gpt-4o-mini', messages: [{ role: 'system', content: systemPrompt }, { role: 'user', content: prompt }], temperature, max_tokens: 4000 }) const content = response.choices[0].message.content || '{}' const codeBlockMatch = content.match(/```(?:json)?\s*([\s\S]*?)```/) const jsonMatch = content.match(/(\{[\s\S]*\})/) let jsonStr: string if (codeBlockMatch && codeBlockMatch[1]) jsonStr = codeBlockMatch[1].trim() else if (jsonMatch && jsonMatch[1]) jsonStr = jsonMatch[1].trim() else jsonStr = content.trim() try { return JSON.parse(jsonStr) as T } catch (e) { console.error('Failed to parse JSON:', jsonStr.substring(0, 200)) throw new Error('Invalid JSON response from AI') } } function buildEvidenceContext(content: ScrapedContent) { return [ `Title: ${content.title}`, `Description: ${content.metaDescription}`, `Headings: ${content.headings.slice(0, 20).join('\n')}`, `Feature Lists: ${content.featureList.slice(0, 20).join('\n')}`, `Paragraphs: ${content.paragraphs.slice(0, 12).join('\n\n')}`, ].join('\n\n') } async function extractProductProfile(content: ScrapedContent, extraPrompt?: string): Promise { const systemPrompt = `You are a strict product analyst. Only use provided evidence. If uncertain, answer "unknown" and lower confidence. Return JSON only.` const prompt = `Analyze the product using evidence only. ${buildEvidenceContext(content)} Return JSON: { "productName": "...", "category": "...", "primaryJTBD": "...", "targetPersona": "...", "scopeBoundary": "...", "nonGoals": ["..."], "differentiators": ["..."], "evidence": [{"claim": "...", "snippet": "..."}], "confidence": 0.0 } Rules: - "category" should be a concrete market category, not "software/tool/platform". - "scopeBoundary" must state what the product does NOT do. - "nonGoals" should be explicit exclusions inferred from evidence. - "evidence.snippet" must quote or paraphrase short evidence from the text above. ${extraPrompt ? `\nUser guidance: ${extraPrompt}` : ""}` const result = await aiGenerate(prompt, systemPrompt, 0.2) return { ...result, nonGoals: result.nonGoals?.slice(0, 6) ?? [], differentiators: result.differentiators?.slice(0, 6) ?? [], evidence: result.evidence?.slice(0, 6) ?? [], confidence: typeof result.confidence === "number" ? result.confidence : 0.3, } } async function extractFeatures(content: ScrapedContent, extraPrompt?: string): Promise { const systemPrompt = `Extract EVERY feature from website content. Be exhaustive.` const prompt = `Extract features from: Title: ${content.title} Description: ${content.metaDescription} Headings: ${content.headings.slice(0, 15).join('\n')} Paragraphs: ${content.paragraphs.slice(0, 10).join('\n\n')} Feature Lists: ${content.featureList.slice(0, 15).join('\n')} Return JSON: {"features": [{"name": "...", "description": "...", "benefits": ["..."], "useCases": ["..."]}]} Aim for 10-15 features. ${extraPrompt ? `User guidance: ${extraPrompt}` : ""}` const result = await aiGenerate<{ features: Feature[] }>(prompt, systemPrompt, 0.4) return result.features.slice(0, 20) } async function generateCompetitorCandidates( profile: ProductProfile, extraPrompt?: string ): Promise<{ candidates: { name: string; type: "direct" | "adjacent" | "generic"; rationale: string; confidence: number }[] }> { const systemPrompt = `Generate candidate competitors based only on the product profile. Return JSON only.` const prompt = `Product profile: Name: ${profile.productName} Category: ${profile.category} JTBD: ${profile.primaryJTBD} Target persona: ${profile.targetPersona} Scope boundary: ${profile.scopeBoundary} Non-goals: ${profile.nonGoals.join(", ") || "unknown"} Differentiators: ${profile.differentiators.join(", ") || "unknown"} Evidence (for context): ${profile.evidence.map(e => `- ${e.claim}: ${e.snippet}`).join("\n")} Rules: - Output real product/company names only. - Classify as "direct" if same JTBD + same persona + same category. - "adjacent" if overlap partially. - "generic" for broad tools people misuse for this (only include if evidence suggests). - Avoid broad suites unless the category is that suite. ${extraPrompt ? `User guidance: ${extraPrompt}\n` : ""} Return JSON: { "candidates": [ { "name": "Product", "type": "direct|adjacent|generic", "rationale": "...", "confidence": 0.0 } ] }` return await aiGenerate<{ candidates: { name: string; type: "direct" | "adjacent" | "generic"; rationale: string; confidence: number }[] }>( prompt, systemPrompt, 0.2 ) } async function selectDirectCompetitors( profile: ProductProfile, candidates: { name: string; type: "direct" | "adjacent" | "generic"; rationale: string; confidence: number }[], extraPrompt?: string ): Promise { const systemPrompt = `You are a strict verifier. Only accept direct competitors. Return JSON only.` const prompt = `Product profile: Name: ${profile.productName} Category: ${profile.category} JTBD: ${profile.primaryJTBD} Target persona: ${profile.targetPersona} Scope boundary: ${profile.scopeBoundary} Non-goals: ${profile.nonGoals.join(", ") || "unknown"} Differentiators: ${profile.differentiators.join(", ") || "unknown"} Candidates: ${candidates.map(c => `- ${c.name} (${c.type}) : ${c.rationale}`).join("\n")} Rules: - Only keep DIRECT competitors (same JTBD + persona + category). - Reject "generic" tools unless the category itself is generic. - Provide 3-6 competitors. If fewer, include the closest adjacent but label as direct only if truly overlapping. ${extraPrompt ? `User guidance: ${extraPrompt}\n` : ""} Return JSON: { "competitors": [ { "name": "Asana", "differentiator": "Why this product is better", "theirStrength": "What they do well", "switchTrigger": "Why users switch", "theirWeakness": "Their main weakness" } ] }` const result = await aiGenerate<{ competitors: Competitor[] }>(prompt, systemPrompt, 0.2) return result.competitors .map(c => ({ ...c, name: c.name.replace(/^Competitor\s+[A-Z]$/i, 'Alternative Solution').replace(/^Generic\s+/i, '') })) .filter(c => c.name.length > 1) } async function generateKeywords( features: Feature[], content: ScrapedContent, competitors: Competitor[], extraPrompt?: string ): Promise { const systemPrompt = `Generate search-ready phrases users would actually type.` const featuresText = features.map(f => f.name).join(', ') const competitorNames = competitors.map(c => c.name).filter(n => n.length > 1).join(', ') const differentiatorGuidance = competitorNames ? `Generate 20+ differentiator phrases comparing to: ${competitorNames}` : `If no competitors are provided, do not invent them. Reduce differentiator share to 5% using generic phrases like "alternatives to X category".` const prompt = `Generate 60-80 search phrases for: ${content.title} Features: ${featuresText} Competitors: ${competitorNames || "None"} CRITICAL - Follow this priority: 1. 60% 2-4 word phrases (e.g., "client onboarding checklist", "bug triage workflow") 2. 25% differentiation phrases (e.g., "asana alternative", "faster than jira") 3. 15% single-word brand terms only (product/competitor names) Return JSON: {"keywords": [{"term": "phrase", "type": "differentiator|product|feature|problem|solution|competitor", "searchVolume": "high|medium|low", "intent": "informational|navigational|transactional", "funnel": "awareness|consideration|decision", "emotionalIntensity": "frustrated|curious|ready"}]} ${differentiatorGuidance} ${extraPrompt ? `User guidance: ${extraPrompt}` : ""}` const result = await aiGenerate<{ keywords: Keyword[] }>(prompt, systemPrompt, 0.4) const stopTerms = new Set([ 'platform', 'solution', 'tool', 'software', 'app', 'system', 'product', 'service', ]) const normalized = result.keywords .map((keyword) => ({ ...keyword, term: keyword.term.trim() })) .filter((keyword) => keyword.term.length > 2) .filter((keyword) => { const words = keyword.term.split(/\s+/).filter(Boolean) if (words.length === 1) { return keyword.type === 'product' || keyword.type === 'competitor' || keyword.type === 'differentiator' } return words.length <= 4 }) .filter((keyword) => !stopTerms.has(keyword.term.toLowerCase())) // Sort: differentiators first, then by word count return normalized.sort((a, b) => { const aDiff = a.type === 'differentiator' ? 0 : 1 const bDiff = b.type === 'differentiator' ? 0 : 1 if (aDiff !== bDiff) return aDiff - bDiff const aWords = a.term.split(/\s+/).length const bWords = b.term.split(/\s+/).length if (aWords !== bWords) return aWords - bWords return a.term.length - b.term.length }).slice(0, 80) } async function identifyProblems( features: Feature[], content: ScrapedContent, extraPrompt?: string ): Promise { const systemPrompt = `Identify problems using JTBD framework.` const prompt = `Identify 8-12 problems solved by: ${features.map(f => f.name).join(', ')} Content: ${content.rawText.slice(0, 3000)} Return JSON: {"problems": [{"problem": "...", "severity": "high|medium|low", "currentWorkarounds": ["..."], "emotionalImpact": "...", "searchTerms": ["..."]}]} ${extraPrompt ? `User guidance: ${extraPrompt}` : ""}` const result = await aiGenerate<{ problems: Problem[] }>(prompt, systemPrompt, 0.4) return result.problems } async function generatePersonas( content: ScrapedContent, problems: Problem[], extraPrompt?: string ): Promise { const systemPrompt = `Create diverse user personas with search behavior.` const prompt = `Create 4-5 personas for: ${content.title} Description: ${content.metaDescription} Problems: ${problems.map(p => p.problem).slice(0, 5).join(', ')} Return JSON: {"personas": [{"name": "Descriptive name", "role": "Job title", "companySize": "e.g. 10-50 employees", "industry": "...", "painPoints": ["..."], "goals": ["..."], "techSavvy": "low|medium|high", "objections": ["..."], "searchBehavior": ["..."]}]} ${extraPrompt ? `User guidance: ${extraPrompt}` : ""}` const result = await aiGenerate<{ personas: Persona[] }>(prompt, systemPrompt, 0.5) return result.personas } async function generateUseCases( features: Feature[], personas: Persona[], problems: Problem[], extraPrompt?: string ): Promise { const systemPrompt = `Create JTBD use case scenarios.` const prompt = `Create 10 use cases. Features: ${features.map(f => f.name).slice(0, 5).join(', ')} Problems: ${problems.map(p => p.problem).slice(0, 3).join(', ')} Return JSON: {"useCases": [{"scenario": "...", "trigger": "...", "emotionalState": "...", "currentWorkflow": ["..."], "desiredOutcome": "...", "alternativeProducts": ["..."], "whyThisProduct": "...", "churnRisk": ["..."]}]} ${extraPrompt ? `User guidance: ${extraPrompt}` : ""}` const result = await aiGenerate<{ useCases: UseCase[] }>(prompt, systemPrompt, 0.5) return result.useCases } function generateDorkQueries(keywords: Keyword[], problems: Problem[], useCases: UseCase[], competitors: Competitor[]): DorkQuery[] { const queries: DorkQuery[] = [] const topKeywords = keywords.slice(0, 20).map(k => k.term) const topProblems = problems.slice(0, 5).map(p => p.problem) const competitorNames = competitors.map(c => c.name).filter(n => n.length > 1) // Differentiation queries (HIGH PRIORITY) competitorNames.forEach(comp => { queries.push({ query: `site:reddit.com "${comp}" ("alternative" OR "switching from" OR "moving away from")`, platform: 'reddit', intent: 'alternative', priority: 'high' }) queries.push({ query: `site:reddit.com "${comp}" ("better than" OR "vs" OR "versus" OR "compared to")`, platform: 'reddit', intent: 'comparison', priority: 'high' }) }) // Keyword-based queries const redditIntents = [ { template: 'site:reddit.com "{term}" ("looking for" OR "recommendation")', intent: 'looking-for' as const }, { template: 'site:reddit.com "{term}" ("frustrated" OR "hate" OR "sucks")', intent: 'frustrated' as const }, { template: 'site:reddit.com "{term}" ("tired of" OR "fed up")', intent: 'frustrated' as const }, ] topKeywords.slice(0, 10).forEach(term => { redditIntents.forEach(({ template, intent }) => { queries.push({ query: template.replace('{term}', term), platform: 'reddit', intent, priority: intent === 'frustrated' ? 'high' : 'medium' }) }) }) // Problem-based queries topProblems.forEach(problem => { queries.push({ query: `site:reddit.com "${problem}" ("how to" OR "solution")`, platform: 'reddit', intent: 'problem-solving', priority: 'high' }) }) // Hacker News topKeywords.slice(0, 8).forEach(term => { queries.push({ query: `site:news.ycombinator.com "Ask HN" "${term}"`, platform: 'hackernews', intent: 'looking-for', priority: 'high' }) }) // Indie Hackers topKeywords.slice(0, 6).forEach(term => { queries.push({ query: `site:indiehackers.com "${term}" ("looking for" OR "need")`, platform: 'indiehackers', intent: 'looking-for', priority: 'medium' }) }) return queries } async function generateDorkQueriesWithAI( analysis: EnhancedProductAnalysis, extraPrompt?: string ): Promise { const systemPrompt = `Generate high-signal search queries for forums. Return JSON only.` const prompt = `Create 40-60 dork queries. Product: ${analysis.productName} Category: ${analysis.category} Positioning: ${analysis.positioning} Keywords: ${analysis.keywords.map(k => k.term).slice(0, 25).join(", ")} Problems: ${analysis.problemsSolved.map(p => p.problem).slice(0, 10).join(", ")} Competitors: ${analysis.competitors.map(c => c.name).slice(0, 10).join(", ")} Use cases: ${analysis.useCases.map(u => u.scenario).slice(0, 8).join(", ")} Rules: - Use these platforms only: reddit, hackernews, indiehackers, quora, stackoverflow, twitter. - Include intent: looking-for, frustrated, alternative, comparison, problem-solving, tutorial. - Prefer query patterns like site:reddit.com "phrase" ... Return JSON: {"dorkQueries": [{"query": "...", "platform": "reddit|hackernews|indiehackers|twitter|quora|stackoverflow", "intent": "looking-for|frustrated|alternative|comparison|problem-solving|tutorial", "priority": "high|medium|low"}]} ${extraPrompt ? `User guidance: ${extraPrompt}` : ""}` const result = await aiGenerate<{ dorkQueries: DorkQuery[] }>(prompt, systemPrompt, 0.3) return result.dorkQueries } type AnalysisProgressUpdate = { key: "features" | "competitors" | "keywords" | "problems" | "useCases" | "dorkQueries" status: "running" | "completed" detail?: string } export async function repromptSection( sectionKey: "profile" | "features" | "competitors" | "keywords" | "problems" | "personas" | "useCases" | "dorkQueries", content: ScrapedContent, analysis: EnhancedProductAnalysis, extraPrompt?: string ): Promise { if (sectionKey === "profile") { const profile = await extractProductProfile(content, extraPrompt); const tagline = content.metaDescription.split(".")[0]; const positioning = [ profile.primaryJTBD && profile.primaryJTBD !== "unknown" ? profile.primaryJTBD : "", profile.targetPersona && profile.targetPersona !== "unknown" ? `for ${profile.targetPersona}` : "", ].filter(Boolean).join(" "); return { productName: profile.productName && profile.productName !== "unknown" ? profile.productName : analysis.productName, tagline, description: content.metaDescription, category: profile.category && profile.category !== "unknown" ? profile.category : analysis.category, positioning, primaryJTBD: profile.primaryJTBD, targetPersona: profile.targetPersona, scopeBoundary: profile.scopeBoundary, nonGoals: profile.nonGoals, differentiators: profile.differentiators, evidence: profile.evidence, confidence: profile.confidence, }; } if (sectionKey === "features") { return await extractFeatures(content, extraPrompt); } if (sectionKey === "competitors") { const profile = await extractProductProfile(content, extraPrompt); const candidateSet = await generateCompetitorCandidates(profile, extraPrompt); return await selectDirectCompetitors(profile, candidateSet.candidates, extraPrompt); } if (sectionKey === "keywords") { const features = analysis.features?.length ? analysis.features : await extractFeatures(content); return await generateKeywords(features, content, analysis.competitors || [], extraPrompt); } if (sectionKey === "problems") { const features = analysis.features?.length ? analysis.features : await extractFeatures(content); return await identifyProblems(features, content, extraPrompt); } if (sectionKey === "personas") { const problems = analysis.problemsSolved?.length ? analysis.problemsSolved : await identifyProblems(analysis.features || [], content); return await generatePersonas(content, problems, extraPrompt); } if (sectionKey === "useCases") { const features = analysis.features?.length ? analysis.features : await extractFeatures(content); const problems = analysis.problemsSolved?.length ? analysis.problemsSolved : await identifyProblems(features, content); const personas = analysis.personas?.length ? analysis.personas : await generatePersonas(content, problems); return await generateUseCases(features, personas, problems, extraPrompt); } if (sectionKey === "dorkQueries") { return await generateDorkQueriesWithAI(analysis, extraPrompt); } throw new Error(`Unsupported section key: ${sectionKey}`); } export async function performDeepAnalysis( content: ScrapedContent, onProgress?: (update: AnalysisProgressUpdate) => void | Promise ): Promise { console.log('🔍 Starting deep analysis...') console.log(' 🧭 Product profiling...') const productProfile = await extractProductProfile(content) console.log(` ✓ Profiled as ${productProfile.category} for ${productProfile.targetPersona} (conf ${productProfile.confidence})`) console.log(' 📦 Pass 1: Features...') await onProgress?.({ key: "features", status: "running" }) const features = await extractFeatures(content) console.log(` ✓ ${features.length} features`) await onProgress?.({ key: "features", status: "completed", detail: `${features.length} features` }) console.log(' 🏆 Pass 2: Competitors...') await onProgress?.({ key: "competitors", status: "running" }) const candidateSet = await generateCompetitorCandidates(productProfile) const competitors = await selectDirectCompetitors(productProfile, candidateSet.candidates) console.log(` ✓ ${competitors.length} competitors: ${competitors.map(c => c.name).join(', ')}`) await onProgress?.({ key: "competitors", status: "completed", detail: `${competitors.length} competitors: ${competitors.map(c => c.name).join(', ')}` }) console.log(' 🔑 Pass 3: Keywords...') await onProgress?.({ key: "keywords", status: "running" }) const keywords = await generateKeywords(features, content, competitors) console.log(` ✓ ${keywords.length} keywords (${keywords.filter(k => k.type === 'differentiator').length} differentiators)`) await onProgress?.({ key: "keywords", status: "completed", detail: `${keywords.length} keywords (${keywords.filter(k => k.type === 'differentiator').length} differentiators)` }) console.log(' 🎯 Pass 4: Problems...') await onProgress?.({ key: "problems", status: "running" }) const problems = await identifyProblems(features, content) const personas = await generatePersonas(content, problems) console.log(` ✓ ${problems.length} problems, ${personas.length} personas`) await onProgress?.({ key: "problems", status: "completed", detail: `${problems.length} problems, ${personas.length} personas` }) console.log(' 💡 Pass 5: Use cases...') await onProgress?.({ key: "useCases", status: "running" }) const useCases = await generateUseCases(features, personas, problems) console.log(` ✓ ${useCases.length} use cases`) await onProgress?.({ key: "useCases", status: "completed", detail: `${useCases.length} use cases` }) console.log(' 🔎 Pass 6: Dork queries...') await onProgress?.({ key: "dorkQueries", status: "running" }) const dorkQueries = generateDorkQueries(keywords, problems, useCases, competitors) console.log(` ✓ ${dorkQueries.length} queries`) await onProgress?.({ key: "dorkQueries", status: "completed", detail: `${dorkQueries.length} queries` }) const productName = content.title.split(/[\|\-–—:]/)[0].trim() const tagline = content.metaDescription.split('.')[0] const positioning = [ productProfile.primaryJTBD && productProfile.primaryJTBD !== "unknown" ? productProfile.primaryJTBD : "", productProfile.targetPersona && productProfile.targetPersona !== "unknown" ? `for ${productProfile.targetPersona}` : "", ].filter(Boolean).join(" ") return { productName: productProfile.productName && productProfile.productName !== "unknown" ? productProfile.productName : productName, tagline, description: content.metaDescription, category: productProfile.category && productProfile.category !== "unknown" ? productProfile.category : '', positioning, features, problemsSolved: problems, personas, keywords, useCases, competitors, dorkQueries, scrapedAt: new Date().toISOString(), analysisVersion: '2.2-profiled' } }