initialised repo
This commit is contained in:
261
lib/analysis-pipeline.ts
Normal file
261
lib/analysis-pipeline.ts
Normal file
@@ -0,0 +1,261 @@
|
||||
import OpenAI from 'openai'
|
||||
import type {
|
||||
ScrapedContent,
|
||||
EnhancedProductAnalysis,
|
||||
Feature,
|
||||
Problem,
|
||||
Persona,
|
||||
Keyword,
|
||||
UseCase,
|
||||
Competitor,
|
||||
DorkQuery
|
||||
} from './types'
|
||||
|
||||
const openai = new OpenAI({
|
||||
apiKey: process.env.OPENAI_API_KEY
|
||||
})
|
||||
|
||||
async function aiGenerate<T>(prompt: string, systemPrompt: string, temperature: number = 0.3): Promise<T> {
|
||||
const response = await openai.chat.completions.create({
|
||||
model: 'gpt-4o-mini',
|
||||
messages: [{ role: 'system', content: systemPrompt }, { role: 'user', content: prompt }],
|
||||
temperature,
|
||||
max_tokens: 4000
|
||||
})
|
||||
|
||||
const content = response.choices[0].message.content || '{}'
|
||||
const codeBlockMatch = content.match(/```(?:json)?\s*([\s\S]*?)```/)
|
||||
const jsonMatch = content.match(/(\{[\s\S]*\})/)
|
||||
|
||||
let jsonStr: string
|
||||
if (codeBlockMatch && codeBlockMatch[1]) jsonStr = codeBlockMatch[1].trim()
|
||||
else if (jsonMatch && jsonMatch[1]) jsonStr = jsonMatch[1].trim()
|
||||
else jsonStr = content.trim()
|
||||
|
||||
try { return JSON.parse(jsonStr) as T }
|
||||
catch (e) {
|
||||
console.error('Failed to parse JSON:', jsonStr.substring(0, 200))
|
||||
throw new Error('Invalid JSON response from AI')
|
||||
}
|
||||
}
|
||||
|
||||
async function extractFeatures(content: ScrapedContent): Promise<Feature[]> {
|
||||
const systemPrompt = `Extract EVERY feature from website content. Be exhaustive.`
|
||||
const prompt = `Extract features from:
|
||||
Title: ${content.title}
|
||||
Description: ${content.metaDescription}
|
||||
Headings: ${content.headings.slice(0, 15).join('\n')}
|
||||
Paragraphs: ${content.paragraphs.slice(0, 10).join('\n\n')}
|
||||
Feature Lists: ${content.featureList.slice(0, 15).join('\n')}
|
||||
|
||||
Return JSON: {"features": [{"name": "...", "description": "...", "benefits": ["..."], "useCases": ["..."]}]}
|
||||
Aim for 10-15 features.`
|
||||
|
||||
const result = await aiGenerate<{ features: Feature[] }>(prompt, systemPrompt, 0.4)
|
||||
return result.features.slice(0, 20)
|
||||
}
|
||||
|
||||
async function identifyCompetitors(content: ScrapedContent): Promise<Competitor[]> {
|
||||
const systemPrompt = `Identify real, named competitors. Use actual company/product names like "Asana", "Jira", "Monday.com", "Trello", "Notion". Never use generic names like "Competitor A".`
|
||||
|
||||
const prompt = `Identify 5-6 real competitors for: ${content.title}
|
||||
Description: ${content.metaDescription}
|
||||
|
||||
Return EXACT JSON format:
|
||||
{
|
||||
"competitors": [
|
||||
{
|
||||
"name": "Asana",
|
||||
"differentiator": "Why this product is better",
|
||||
"theirStrength": "What they do well",
|
||||
"switchTrigger": "Why users switch",
|
||||
"theirWeakness": "Their main weakness"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Include: Direct competitors (same space), Big players, Popular alternatives, Tools people misuse for this. Use ONLY real product names.`
|
||||
|
||||
const result = await aiGenerate<{ competitors: Competitor[] }>(prompt, systemPrompt, 0.3)
|
||||
|
||||
// Validate competitor names aren't generic
|
||||
return result.competitors.map(c => ({
|
||||
...c,
|
||||
name: c.name.replace(/^Competitor\s+[A-Z]$/i, 'Alternative Solution').replace(/^Generic\s+/i, '')
|
||||
})).filter(c => c.name.length > 1)
|
||||
}
|
||||
|
||||
async function generateKeywords(features: Feature[], content: ScrapedContent, competitors: Competitor[]): Promise<Keyword[]> {
|
||||
const systemPrompt = `Generate SEO keywords. PRIORITY: 1) Single words, 2) Differentiation keywords showing competitive advantage.`
|
||||
|
||||
const featuresText = features.map(f => f.name).join(', ')
|
||||
const competitorNames = competitors.map(c => c.name).filter(n => n.length > 1).join(', ') || 'Jira, Asana, Monday, Trello'
|
||||
|
||||
const prompt = `Generate 60-80 keywords for: ${content.title}
|
||||
Features: ${featuresText}
|
||||
Competitors: ${competitorNames}
|
||||
|
||||
CRITICAL - Follow this priority:
|
||||
1. 40% SINGLE WORDS (e.g., "tracker", "automate", "sync", "fast")
|
||||
2. 30% DIFFERENTIATION keywords (e.g., "vs-jira", "asana-alternative", "faster", "simpler")
|
||||
3. 30% Short 2-word phrases only when needed
|
||||
|
||||
Return JSON: {"keywords": [{"term": "word", "type": "differentiator|product|feature|problem|solution|competitor", "searchVolume": "high|medium|low", "intent": "informational|navigational|transactional", "funnel": "awareness|consideration|decision", "emotionalIntensity": "frustrated|curious|ready"}]}
|
||||
|
||||
Generate 20+ differentiator keywords comparing to: ${competitorNames}`
|
||||
|
||||
const result = await aiGenerate<{ keywords: Keyword[] }>(prompt, systemPrompt, 0.5)
|
||||
|
||||
// Sort: differentiators first, then by word count
|
||||
return result.keywords.sort((a, b) => {
|
||||
const aDiff = a.type === 'differentiator' ? 0 : 1
|
||||
const bDiff = b.type === 'differentiator' ? 0 : 1
|
||||
if (aDiff !== bDiff) return aDiff - bDiff
|
||||
|
||||
const aWords = a.term.split(/\s+/).length
|
||||
const bWords = b.term.split(/\s+/).length
|
||||
if (aWords !== bWords) return aWords - bWords
|
||||
|
||||
return a.term.length - b.term.length
|
||||
}).slice(0, 80)
|
||||
}
|
||||
|
||||
async function identifyProblems(features: Feature[], content: ScrapedContent): Promise<Problem[]> {
|
||||
const systemPrompt = `Identify problems using JTBD framework.`
|
||||
const prompt = `Identify 8-12 problems solved by: ${features.map(f => f.name).join(', ')}
|
||||
Content: ${content.rawText.slice(0, 3000)}
|
||||
|
||||
Return JSON: {"problems": [{"problem": "...", "severity": "high|medium|low", "currentWorkarounds": ["..."], "emotionalImpact": "...", "searchTerms": ["..."]}]}`
|
||||
|
||||
const result = await aiGenerate<{ problems: Problem[] }>(prompt, systemPrompt, 0.4)
|
||||
return result.problems
|
||||
}
|
||||
|
||||
async function generatePersonas(content: ScrapedContent, problems: Problem[]): Promise<Persona[]> {
|
||||
const systemPrompt = `Create diverse user personas with search behavior.`
|
||||
const prompt = `Create 4-5 personas for: ${content.title}
|
||||
Description: ${content.metaDescription}
|
||||
Problems: ${problems.map(p => p.problem).slice(0, 5).join(', ')}
|
||||
|
||||
Return JSON: {"personas": [{"name": "Descriptive name", "role": "Job title", "companySize": "e.g. 10-50 employees", "industry": "...", "painPoints": ["..."], "goals": ["..."], "techSavvy": "low|medium|high", "objections": ["..."], "searchBehavior": ["..."]}]}`
|
||||
|
||||
const result = await aiGenerate<{ personas: Persona[] }>(prompt, systemPrompt, 0.5)
|
||||
return result.personas
|
||||
}
|
||||
|
||||
async function generateUseCases(features: Feature[], personas: Persona[], problems: Problem[]): Promise<UseCase[]> {
|
||||
const systemPrompt = `Create JTBD use case scenarios.`
|
||||
const prompt = `Create 10 use cases.
|
||||
Features: ${features.map(f => f.name).slice(0, 5).join(', ')}
|
||||
Problems: ${problems.map(p => p.problem).slice(0, 3).join(', ')}
|
||||
|
||||
Return JSON: {"useCases": [{"scenario": "...", "trigger": "...", "emotionalState": "...", "currentWorkflow": ["..."], "desiredOutcome": "...", "alternativeProducts": ["..."], "whyThisProduct": "...", "churnRisk": ["..."]}]}`
|
||||
|
||||
const result = await aiGenerate<{ useCases: UseCase[] }>(prompt, systemPrompt, 0.5)
|
||||
return result.useCases
|
||||
}
|
||||
|
||||
function generateDorkQueries(keywords: Keyword[], problems: Problem[], useCases: UseCase[], competitors: Competitor[]): DorkQuery[] {
|
||||
const queries: DorkQuery[] = []
|
||||
|
||||
const topKeywords = keywords.slice(0, 20).map(k => k.term)
|
||||
const topProblems = problems.slice(0, 5).map(p => p.problem)
|
||||
const competitorNames = competitors.map(c => c.name).filter(n => n.length > 1)
|
||||
|
||||
// Differentiation queries (HIGH PRIORITY)
|
||||
competitorNames.forEach(comp => {
|
||||
queries.push({
|
||||
query: `site:reddit.com "${comp}" ("alternative" OR "switching from" OR "moving away from")`,
|
||||
platform: 'reddit',
|
||||
intent: 'alternative',
|
||||
priority: 'high'
|
||||
})
|
||||
queries.push({
|
||||
query: `site:reddit.com "${comp}" ("better than" OR "vs" OR "versus" OR "compared to")`,
|
||||
platform: 'reddit',
|
||||
intent: 'comparison',
|
||||
priority: 'high'
|
||||
})
|
||||
})
|
||||
|
||||
// Keyword-based queries
|
||||
const redditIntents = [
|
||||
{ template: 'site:reddit.com "{term}" ("looking for" OR "recommendation")', intent: 'looking-for' as const },
|
||||
{ template: 'site:reddit.com "{term}" ("frustrated" OR "hate" OR "sucks")', intent: 'frustrated' as const },
|
||||
{ template: 'site:reddit.com "{term}" ("tired of" OR "fed up")', intent: 'frustrated' as const },
|
||||
]
|
||||
|
||||
topKeywords.slice(0, 10).forEach(term => {
|
||||
redditIntents.forEach(({ template, intent }) => {
|
||||
queries.push({ query: template.replace('{term}', term), platform: 'reddit', intent, priority: intent === 'frustrated' ? 'high' : 'medium' })
|
||||
})
|
||||
})
|
||||
|
||||
// Problem-based queries
|
||||
topProblems.forEach(problem => {
|
||||
queries.push({ query: `site:reddit.com "${problem}" ("how to" OR "solution")`, platform: 'reddit', intent: 'problem-solving', priority: 'high' })
|
||||
})
|
||||
|
||||
// Hacker News
|
||||
topKeywords.slice(0, 8).forEach(term => {
|
||||
queries.push({ query: `site:news.ycombinator.com "Ask HN" "${term}"`, platform: 'hackernews', intent: 'looking-for', priority: 'high' })
|
||||
})
|
||||
|
||||
// Indie Hackers
|
||||
topKeywords.slice(0, 6).forEach(term => {
|
||||
queries.push({ query: `site:indiehackers.com "${term}" ("looking for" OR "need")`, platform: 'indiehackers', intent: 'looking-for', priority: 'medium' })
|
||||
})
|
||||
|
||||
return queries
|
||||
}
|
||||
|
||||
export async function performDeepAnalysis(content: ScrapedContent): Promise<EnhancedProductAnalysis> {
|
||||
console.log('🔍 Starting deep analysis...')
|
||||
|
||||
console.log(' 📦 Pass 1: Features...')
|
||||
const features = await extractFeatures(content)
|
||||
console.log(` ✓ ${features.length} features`)
|
||||
|
||||
console.log(' 🏆 Pass 2: Competitors...')
|
||||
const competitors = await identifyCompetitors(content)
|
||||
console.log(` ✓ ${competitors.length} competitors: ${competitors.map(c => c.name).join(', ')}`)
|
||||
|
||||
console.log(' 🔑 Pass 3: Keywords...')
|
||||
const keywords = await generateKeywords(features, content, competitors)
|
||||
console.log(` ✓ ${keywords.length} keywords (${keywords.filter(k => k.type === 'differentiator').length} differentiators)`)
|
||||
|
||||
console.log(' 🎯 Pass 4: Problems...')
|
||||
const [problems, personas] = await Promise.all([
|
||||
identifyProblems(features, content),
|
||||
generatePersonas(content, [])
|
||||
])
|
||||
console.log(` ✓ ${problems.length} problems, ${personas.length} personas`)
|
||||
|
||||
console.log(' 💡 Pass 5: Use cases...')
|
||||
const useCases = await generateUseCases(features, personas, problems)
|
||||
console.log(` ✓ ${useCases.length} use cases`)
|
||||
|
||||
console.log(' 🔎 Pass 6: Dork queries...')
|
||||
const dorkQueries = generateDorkQueries(keywords, problems, useCases, competitors)
|
||||
console.log(` ✓ ${dorkQueries.length} queries`)
|
||||
|
||||
const productName = content.title.split(/[\|\-–—:]/)[0].trim()
|
||||
const tagline = content.metaDescription.split('.')[0]
|
||||
|
||||
return {
|
||||
productName,
|
||||
tagline,
|
||||
description: content.metaDescription,
|
||||
category: '',
|
||||
positioning: '',
|
||||
features,
|
||||
problemsSolved: problems,
|
||||
personas,
|
||||
keywords,
|
||||
useCases,
|
||||
competitors,
|
||||
dorkQueries,
|
||||
scrapedAt: new Date().toISOString(),
|
||||
analysisVersion: '2.1-optimized'
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user