SanatiLeads/lib/openai.ts

import OpenAI from 'openai'
import type { ProductAnalysis, ScrapedContent, Opportunity } from './types'

const openai = new OpenAI({
  apiKey: process.env.OPENAI_API_KEY
})

export async function analyzeProduct(content: ScrapedContent): Promise<ProductAnalysis> {
  const prompt = `Analyze this website content and extract structured product information.

Website URL: ${content.url}
Page Title: ${content.title}
Meta Description: ${content.metaDescription}

Headings Found:
${content.headings.slice(0, 10).join('\n')}

Key Paragraphs:
${content.paragraphs.slice(0, 8).join('\n\n')}

Feature List Items:
${content.featureList.slice(0, 10).join('\n')}

Navigation/Links:
${content.links.slice(0, 8).join(', ')}

Based on this content, provide a comprehensive product analysis in JSON format:

{
  "productName": "The exact product name",
  "tagline": "A compelling one-sentence tagline",
  "description": "2-3 sentence description of what the product does",
  "features": ["Feature 1", "Feature 2", "Feature 3"],
  "problemsSolved": ["Problem 1", "Problem 2", "Problem 3"],
  "targetAudience": ["Audience segment 1", "Audience segment 2"],
  "valuePropositions": ["Value prop 1", "Value prop 2", "Value prop 3"],
  "keywords": ["keyword1", "keyword2", "keyword3"]
}

Guidelines:
- Product name should be the actual brand/product name, not generic
- Features should be specific capabilities the product offers
- Problems solved should be pain points customers have before using this
- Target audience should be specific personas
- Value propositions should explain WHY someone should care
- Keywords should be terms people search for when looking for this solution

Return ONLY the JSON object, no markdown formatting.`

  const response = await openai.chat.completions.create({
    model: 'gpt-4o-mini',
    messages: [
      {
        role: 'system',
        content: 'You are a product marketing expert who analyzes websites and extracts structured product information. You are thorough and accurate.'
      },
      {
        role: 'user',
        content: prompt
      }
    ],
    temperature: 0.3,
    max_tokens: 1500
  })

  const content_text = response.choices[0].message.content || '{}'

  // Extract JSON from potential markdown
  const jsonMatch = content_text.match(/\{[\s\S]*\}/)
  const jsonStr = jsonMatch ? jsonMatch[0] : content_text

  const analysis = JSON.parse(jsonStr)

  return {
    productName: analysis.productName || content.title,
    tagline: analysis.tagline || '',
    description: analysis.description || content.metaDescription,
    features: analysis.features || [],
    problemsSolved: analysis.problemsSolved || [],
    targetAudience: analysis.targetAudience || [],
    valuePropositions: analysis.valuePropositions || [],
    keywords: analysis.keywords || [],
    scrapedAt: new Date().toISOString()
  }
}

export async function findOpportunities(analysis: ProductAnalysis): Promise<Opportunity[]> {
  // Generate dork queries based on the analysis
  const keywords = analysis.keywords.slice(0, 5)
  const problems = analysis.problemsSolved.slice(0, 3)

  const dorkQueries = [
    ...keywords.map(k => `site:reddit.com "${k}" ("looking for" OR "need" OR "frustrated" OR "problem")`),
    ...keywords.map(k => `site:reddit.com "${k}" ("alternative to" OR "tired of" OR "sucks")`),
    ...keywords.map(k => `site:reddit.com "${k}" ("recommendation" OR "what do you use" OR "suggestions")`),
    ...problems.map(p => `site:reddit.com "${p}" ("how to" OR "help" OR "solution")`),
    ...keywords.map(k => `site:news.ycombinator.com "${k}" ("Ask HN" OR "Show HN")`),
    ...keywords.map(k => `site:indiehackers.com "${k}" ("looking for" OR "need")`),
  ]

  const opportunities: Opportunity[] = []

  // Limit to top queries to avoid rate limits
  for (const query of dorkQueries.slice(0, 6)) {
    try {
      const results = await searchGoogle(query, 8)
      for (const result of results) {
        const opportunity = await analyzeOpportunity(result, analysis)
        if (opportunity.relevanceScore >= 0.5) {
          opportunities.push(opportunity)
        }
      }
    } catch (e) {
      console.error('Search failed:', e)
    }
  }

  // Sort by relevance and dedupe
  const seen = new Set<string>()
  return opportunities
    .filter(o => {
      if (seen.has(o.url)) return false
      seen.add(o.url)
      return true
    })
    .sort((a, b) => b.relevanceScore - a.relevanceScore)
    .slice(0, 15)
}

async function analyzeOpportunity(result: SearchResult, product: ProductAnalysis): Promise<Opportunity> {
  const prompt = `Rate how relevant this forum post is for the following product.

Product: ${product.productName}
Product Description: ${product.description}
Product Features: ${product.features.join(', ')}
Problems Solved: ${product.problemsSolved.join(', ')}

Forum Post Title: ${result.title}
Forum Post Snippet: ${result.snippet}
Source: ${result.source}

Rate on a scale of 0-1 how much this post indicates someone who could benefit from the product.
Identify specific pain points mentioned.
Suggest a helpful, non-spammy way to engage.

Return JSON:
{
  "relevanceScore": 0.0-1.0,
  "painPoints": ["pain point 1", "pain point 2"],
  "suggestedApproach": "Suggested message or approach"
}`

  try {
    const response = await openai.chat.completions.create({
      model: 'gpt-4o-mini',
      messages: [
        {
          role: 'system',
          content: 'You are a sales researcher. Analyze forum posts for product fit. Be honest about relevance.'
        },
        {
          role: 'user',
          content: prompt
        }
      ],
      temperature: 0.3,
      max_tokens: 400
    })

    const content = response.choices[0].message.content || '{}'
    const jsonMatch = content.match(/\{[\s\S]*\}/)
    const jsonStr = jsonMatch ? jsonMatch[0] : content
    const analysis = JSON.parse(jsonStr)

    return {
      title: result.title,
      url: result.url,
      source: result.source,
      snippet: result.snippet.slice(0, 300),
      relevanceScore: analysis.relevanceScore || 0,
      painPoints: analysis.painPoints || [],
      suggestedApproach: analysis.suggestedApproach || ''
    }
  } catch (e) {
    // Fallback simple analysis
    const content = (result.title + ' ' + result.snippet).toLowerCase()
    const overlap = product.keywords.filter(k => content.includes(k.toLowerCase())).length
    const relevance = Math.min(overlap / Math.max(product.keywords.length * 0.5, 1), 1)

    return {
      title: result.title,
      url: result.url,
      source: result.source,
      snippet: result.snippet.slice(0, 300),
      relevanceScore: relevance,
      painPoints: ['Related to product domain'],
      suggestedApproach: 'Share relevant insights about their problem'
    }
  }
}

interface SearchResult {
  title: string
  url: string
  snippet: string
  source: string
}

async function searchGoogle(query: string, num: number): Promise<SearchResult[]> {
  // Try Serper first
  if (process.env.SERPER_API_KEY) {
    try {
      const results = await searchSerper(query, num)
      if (results.length > 0) return results
    } catch (e) {
      console.error('Serper search failed:', e)
    }
  }

  // Fallback to direct fetch
  return searchDirect(query, num)
}

async function searchSerper(query: string, num: number): Promise<SearchResult[]> {
  const response = await fetch('https://google.serper.dev/search', {
    method: 'POST',
    headers: {
      'X-API-KEY': process.env.SERPER_API_KEY!,
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({ q: query, num })
  })

  if (!response.ok) throw new Error('Serper API error')

  const data = await response.json()
  return (data.organic || []).map((r: any) => ({
    title: r.title,
    url: r.link,
    snippet: r.snippet,
    source: getSource(r.link)
  }))
}

async function searchDirect(query: string, num: number): Promise<SearchResult[]> {
  const encodedQuery = encodeURIComponent(query)
  const url = `https://www.google.com/search?q=${encodedQuery}&num=${num}`

  const response = await fetch(url, {
    headers: {
      'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
    }
  })

  const html = await response.text()

  // Simple regex-based parsing to avoid cheerio issues
  const results: SearchResult[] = []

  // Extract search results using regex
  const resultBlocks = html.match(/<div class="g"[^>]*>([\s\S]*?)<\/div>\s*<\/div>/g) || []

  for (const block of resultBlocks.slice(0, num)) {
    const titleMatch = block.match(/<h3[^>]*>(.*?)<\/h3>/)
    const linkMatch = block.match(/<a href="([^"]+)"/)
    const snippetMatch = block.match(/<div class="VwiC3b[^"]*"[^>]*>(.*?)<\/div>/)

    if (titleMatch && linkMatch) {
      const title = titleMatch[1].replace(/<[^>]+>/g, '')
      const link = linkMatch[1]
      const snippet = snippetMatch ? snippetMatch[1].replace(/<[^>]+>/g, '') : ''

      results.push({
        title,
        url: link,
        snippet,
        source: getSource(link)
      })
    }
  }

  return results
}

function getSource(url: string): string {
  if (url.includes('reddit.com')) return 'Reddit'
  if (url.includes('news.ycombinator.com')) return 'Hacker News'
  if (url.includes('indiehackers.com')) return 'Indie Hackers'
  if (url.includes('quora.com')) return 'Quora'
  if (url.includes('twitter.com') || url.includes('x.com')) return 'Twitter/X'
  if (url.includes('stackexchange.com') || url.includes('stackoverflow.com')) return 'Stack Exchange'
  return 'Other'
}