294 lines
9.3 KiB
TypeScript
294 lines
9.3 KiB
TypeScript
import OpenAI from 'openai'
|
|
import type { ProductAnalysis, ScrapedContent, Opportunity } from './types'
|
|
|
|
const openai = new OpenAI({
|
|
apiKey: process.env.OPENAI_API_KEY
|
|
})
|
|
|
|
export async function analyzeProduct(content: ScrapedContent): Promise<ProductAnalysis> {
|
|
const prompt = `Analyze this website content and extract structured product information.
|
|
|
|
Website URL: ${content.url}
|
|
Page Title: ${content.title}
|
|
Meta Description: ${content.metaDescription}
|
|
|
|
Headings Found:
|
|
${content.headings.slice(0, 10).join('\n')}
|
|
|
|
Key Paragraphs:
|
|
${content.paragraphs.slice(0, 8).join('\n\n')}
|
|
|
|
Feature List Items:
|
|
${content.featureList.slice(0, 10).join('\n')}
|
|
|
|
Navigation/Links:
|
|
${content.links.slice(0, 8).join(', ')}
|
|
|
|
Based on this content, provide a comprehensive product analysis in JSON format:
|
|
|
|
{
|
|
"productName": "The exact product name",
|
|
"tagline": "A compelling one-sentence tagline",
|
|
"description": "2-3 sentence description of what the product does",
|
|
"features": ["Feature 1", "Feature 2", "Feature 3"],
|
|
"problemsSolved": ["Problem 1", "Problem 2", "Problem 3"],
|
|
"targetAudience": ["Audience segment 1", "Audience segment 2"],
|
|
"valuePropositions": ["Value prop 1", "Value prop 2", "Value prop 3"],
|
|
"keywords": ["keyword1", "keyword2", "keyword3"]
|
|
}
|
|
|
|
Guidelines:
|
|
- Product name should be the actual brand/product name, not generic
|
|
- Features should be specific capabilities the product offers
|
|
- Problems solved should be pain points customers have before using this
|
|
- Target audience should be specific personas
|
|
- Value propositions should explain WHY someone should care
|
|
- Keywords should be terms people search for when looking for this solution
|
|
|
|
Return ONLY the JSON object, no markdown formatting.`
|
|
|
|
const response = await openai.chat.completions.create({
|
|
model: 'gpt-4o-mini',
|
|
messages: [
|
|
{
|
|
role: 'system',
|
|
content: 'You are a product marketing expert who analyzes websites and extracts structured product information. You are thorough and accurate.'
|
|
},
|
|
{
|
|
role: 'user',
|
|
content: prompt
|
|
}
|
|
],
|
|
temperature: 0.3,
|
|
max_tokens: 1500
|
|
})
|
|
|
|
const content_text = response.choices[0].message.content || '{}'
|
|
|
|
// Extract JSON from potential markdown
|
|
const jsonMatch = content_text.match(/\{[\s\S]*\}/)
|
|
const jsonStr = jsonMatch ? jsonMatch[0] : content_text
|
|
|
|
const analysis = JSON.parse(jsonStr)
|
|
|
|
return {
|
|
productName: analysis.productName || content.title,
|
|
tagline: analysis.tagline || '',
|
|
description: analysis.description || content.metaDescription,
|
|
features: analysis.features || [],
|
|
problemsSolved: analysis.problemsSolved || [],
|
|
targetAudience: analysis.targetAudience || [],
|
|
valuePropositions: analysis.valuePropositions || [],
|
|
keywords: analysis.keywords || [],
|
|
scrapedAt: new Date().toISOString()
|
|
}
|
|
}
|
|
|
|
export async function findOpportunities(analysis: ProductAnalysis): Promise<Opportunity[]> {
|
|
// Generate dork queries based on the analysis
|
|
const keywords = analysis.keywords.slice(0, 5)
|
|
const problems = analysis.problemsSolved.slice(0, 3)
|
|
|
|
const dorkQueries = [
|
|
...keywords.map(k => `site:reddit.com "${k}" ("looking for" OR "need" OR "frustrated" OR "problem")`),
|
|
...keywords.map(k => `site:reddit.com "${k}" ("alternative to" OR "tired of" OR "sucks")`),
|
|
...keywords.map(k => `site:reddit.com "${k}" ("recommendation" OR "what do you use" OR "suggestions")`),
|
|
...problems.map(p => `site:reddit.com "${p}" ("how to" OR "help" OR "solution")`),
|
|
...keywords.map(k => `site:news.ycombinator.com "${k}" ("Ask HN" OR "Show HN")`),
|
|
...keywords.map(k => `site:indiehackers.com "${k}" ("looking for" OR "need")`),
|
|
]
|
|
|
|
const opportunities: Opportunity[] = []
|
|
|
|
// Limit to top queries to avoid rate limits
|
|
for (const query of dorkQueries.slice(0, 6)) {
|
|
try {
|
|
const results = await searchGoogle(query, 8)
|
|
for (const result of results) {
|
|
const opportunity = await analyzeOpportunity(result, analysis)
|
|
if (opportunity.relevanceScore >= 0.5) {
|
|
opportunities.push(opportunity)
|
|
}
|
|
}
|
|
} catch (e) {
|
|
console.error('Search failed:', e)
|
|
}
|
|
}
|
|
|
|
// Sort by relevance and dedupe
|
|
const seen = new Set<string>()
|
|
return opportunities
|
|
.filter(o => {
|
|
if (seen.has(o.url)) return false
|
|
seen.add(o.url)
|
|
return true
|
|
})
|
|
.sort((a, b) => b.relevanceScore - a.relevanceScore)
|
|
.slice(0, 15)
|
|
}
|
|
|
|
async function analyzeOpportunity(result: SearchResult, product: ProductAnalysis): Promise<Opportunity> {
|
|
const prompt = `Rate how relevant this forum post is for the following product.
|
|
|
|
Product: ${product.productName}
|
|
Product Description: ${product.description}
|
|
Product Features: ${product.features.join(', ')}
|
|
Problems Solved: ${product.problemsSolved.join(', ')}
|
|
|
|
Forum Post Title: ${result.title}
|
|
Forum Post Snippet: ${result.snippet}
|
|
Source: ${result.source}
|
|
|
|
Rate on a scale of 0-1 how much this post indicates someone who could benefit from the product.
|
|
Identify specific pain points mentioned.
|
|
Suggest a helpful, non-spammy way to engage.
|
|
|
|
Return JSON:
|
|
{
|
|
"relevanceScore": 0.0-1.0,
|
|
"painPoints": ["pain point 1", "pain point 2"],
|
|
"suggestedApproach": "Suggested message or approach"
|
|
}`
|
|
|
|
try {
|
|
const response = await openai.chat.completions.create({
|
|
model: 'gpt-4o-mini',
|
|
messages: [
|
|
{
|
|
role: 'system',
|
|
content: 'You are a sales researcher. Analyze forum posts for product fit. Be honest about relevance.'
|
|
},
|
|
{
|
|
role: 'user',
|
|
content: prompt
|
|
}
|
|
],
|
|
temperature: 0.3,
|
|
max_tokens: 400
|
|
})
|
|
|
|
const content = response.choices[0].message.content || '{}'
|
|
const jsonMatch = content.match(/\{[\s\S]*\}/)
|
|
const jsonStr = jsonMatch ? jsonMatch[0] : content
|
|
const analysis = JSON.parse(jsonStr)
|
|
|
|
return {
|
|
title: result.title,
|
|
url: result.url,
|
|
source: result.source,
|
|
snippet: result.snippet.slice(0, 300),
|
|
relevanceScore: analysis.relevanceScore || 0,
|
|
painPoints: analysis.painPoints || [],
|
|
suggestedApproach: analysis.suggestedApproach || ''
|
|
}
|
|
} catch (e) {
|
|
// Fallback simple analysis
|
|
const content = (result.title + ' ' + result.snippet).toLowerCase()
|
|
const overlap = product.keywords.filter(k => content.includes(k.toLowerCase())).length
|
|
const relevance = Math.min(overlap / Math.max(product.keywords.length * 0.5, 1), 1)
|
|
|
|
return {
|
|
title: result.title,
|
|
url: result.url,
|
|
source: result.source,
|
|
snippet: result.snippet.slice(0, 300),
|
|
relevanceScore: relevance,
|
|
painPoints: ['Related to product domain'],
|
|
suggestedApproach: 'Share relevant insights about their problem'
|
|
}
|
|
}
|
|
}
|
|
|
|
interface SearchResult {
|
|
title: string
|
|
url: string
|
|
snippet: string
|
|
source: string
|
|
}
|
|
|
|
async function searchGoogle(query: string, num: number): Promise<SearchResult[]> {
|
|
// Try Serper first
|
|
if (process.env.SERPER_API_KEY) {
|
|
try {
|
|
const results = await searchSerper(query, num)
|
|
if (results.length > 0) return results
|
|
} catch (e) {
|
|
console.error('Serper search failed:', e)
|
|
}
|
|
}
|
|
|
|
// Fallback to direct fetch
|
|
return searchDirect(query, num)
|
|
}
|
|
|
|
async function searchSerper(query: string, num: number): Promise<SearchResult[]> {
|
|
const response = await fetch('https://google.serper.dev/search', {
|
|
method: 'POST',
|
|
headers: {
|
|
'X-API-KEY': process.env.SERPER_API_KEY!,
|
|
'Content-Type': 'application/json'
|
|
},
|
|
body: JSON.stringify({ q: query, num })
|
|
})
|
|
|
|
if (!response.ok) throw new Error('Serper API error')
|
|
|
|
const data = await response.json()
|
|
return (data.organic || []).map((r: any) => ({
|
|
title: r.title,
|
|
url: r.link,
|
|
snippet: r.snippet,
|
|
source: getSource(r.link)
|
|
}))
|
|
}
|
|
|
|
async function searchDirect(query: string, num: number): Promise<SearchResult[]> {
|
|
const encodedQuery = encodeURIComponent(query)
|
|
const url = `https://www.google.com/search?q=${encodedQuery}&num=${num}`
|
|
|
|
const response = await fetch(url, {
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
|
}
|
|
})
|
|
|
|
const html = await response.text()
|
|
|
|
// Simple regex-based parsing to avoid cheerio issues
|
|
const results: SearchResult[] = []
|
|
|
|
// Extract search results using regex
|
|
const resultBlocks = html.match(/<div class="g"[^>]*>([\s\S]*?)<\/div>\s*<\/div>/g) || []
|
|
|
|
for (const block of resultBlocks.slice(0, num)) {
|
|
const titleMatch = block.match(/<h3[^>]*>(.*?)<\/h3>/)
|
|
const linkMatch = block.match(/<a href="([^"]+)"/)
|
|
const snippetMatch = block.match(/<div class="VwiC3b[^"]*"[^>]*>(.*?)<\/div>/)
|
|
|
|
if (titleMatch && linkMatch) {
|
|
const title = titleMatch[1].replace(/<[^>]+>/g, '')
|
|
const link = linkMatch[1]
|
|
const snippet = snippetMatch ? snippetMatch[1].replace(/<[^>]+>/g, '') : ''
|
|
|
|
results.push({
|
|
title,
|
|
url: link,
|
|
snippet,
|
|
source: getSource(link)
|
|
})
|
|
}
|
|
}
|
|
|
|
return results
|
|
}
|
|
|
|
function getSource(url: string): string {
|
|
if (url.includes('reddit.com')) return 'Reddit'
|
|
if (url.includes('news.ycombinator.com')) return 'Hacker News'
|
|
if (url.includes('indiehackers.com')) return 'Indie Hackers'
|
|
if (url.includes('quora.com')) return 'Quora'
|
|
if (url.includes('twitter.com') || url.includes('x.com')) return 'Twitter/X'
|
|
if (url.includes('stackexchange.com') || url.includes('stackoverflow.com')) return 'Stack Exchange'
|
|
return 'Other'
|
|
}
|