News Extraction

Extract clean, structured news content using WebLinq API workflows and JSON schemas.

Comprehensive News Analysis

Extract structured news data with precise JSON schemas and flexible text analysis:

async function extractNewsArticle(articleUrl) {
  const apiKey = process.env.WEBLINQ_API_KEY;
  
  // Define comprehensive news article schema
  const newsSchema = {
    type: "object",
    properties: {
      headline: { type: "string" },
      subheadline: { type: "string" },
      author: {
        type: "object",
        properties: {
          name: { type: "string" },
          email: { type: "string" },
          twitter: { type: "string" }
        }
      },
      publishDate: { type: "string", format: "date-time" },
      lastModified: { type: "string", format: "date-time" },
      category: { type: "string" },
      tags: { type: "array", items: { type: "string" } },
      summary: { type: "string", maxLength: 300 },
      keyPoints: { type: "array", items: { type: "string" } },
      quotes: {
        type: "array",
        items: {
          type: "object",
          properties: {
            text: { type: "string" },
            speaker: { type: "string" },
            role: { type: "string" }
          }
        }
      },
      readingTime: { type: "number" },
      wordCount: { type: "number" },
      credibilityScore: { type: "number", minimum: 1, maximum: 10 },
      sources: { type: "array", items: { type: "string" } }
    },
    required: ["headline", "publishDate", "summary", "keyPoints"]
  };

const response = await fetch('https://api.weblinq.dev/v1/web/extract-json', {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
url: articleUrl,
response_format: {
type: "json_schema",
json_schema: newsSchema
},
prompt: "Extract comprehensive news article information including metadata, key points, and quotes"
})
});

const data = await response.json();

if (!data.success) {
throw new Error(data.error?.message || 'Failed to extract news data');
}

return data.data.extracted;
}

````javascript Editorial Analysis (Text)
async function analyzeNewsEditorial(articleUrl) {
  const apiKey = process.env.WEBLINQ_API_KEY;

  // Use text response for nuanced editorial analysis
  const response = await fetch('https://api.weblinq.dev/v1/web/extract-json', {
    method: 'POST',
    headers: {
      'Authorization': `Bearer ${apiKey}`,
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      url: articleUrl,
      prompt: `Provide an in-depth editorial analysis of this news article:

      1. **Bias Assessment**: Identify any potential bias in language, source selection, or framing
      2. **Fact vs Opinion**: Distinguish between factual reporting and editorial opinion
      3. **Completeness**: What important context or perspectives might be missing?
      4. **Impact Analysis**: How might this story affect different stakeholders?
      5. **Credibility Factors**: What makes this reporting trustworthy or questionable?
      6. **Follow-up Questions**: What questions should readers ask or investigate further?

      Write this as a media literacy analysis that helps readers think critically about the information presented.`,
      response_format: { type: "text" }
    })
  });

  const data = await response.json();

  if (!data.success) {
    throw new Error(data.error?.message || 'Failed to analyze article');
  }

  return data.data.extracted;
}

```javascript News Summary (Text)
async function generateNewsSummary(articleUrl, audienceType = 'general') {
  const apiKey = process.env.WEBLINQ_API_KEY;

  const audiencePrompts = {
    general: "Write a clear, accessible summary for the general public",
    technical: "Write a detailed summary for industry professionals and experts",
    executive: "Write a concise executive briefing focusing on business implications",
    student: "Write an educational summary with background context for students"
  };

  const response = await fetch('https://api.weblinq.dev/v1/web/extract-json', {
    method: 'POST',
    headers: {
      'Authorization': `Bearer ${apiKey}`,
      'Content-Type': 'application/json'
    },
    body: JSON.stringify({
      url: articleUrl,
      prompt: `${audiencePrompts[audienceType]}.

      Include:
      - Main story in 2-3 sentences
      - Why this matters now
      - Key people/organizations involved
      - Potential implications or next steps
      - Background context if needed

      Keep it engaging and informative.`,
      response_format: { type: "text" }
    })
  });

  const data = await response.json();

  if (!data.success) {
    throw new Error(data.error?.message || 'Failed to generate summary');
  }

  return data.data.extracted;
}

```python Python Dual Approach
import requests

def comprehensive_news_analysis(article_url):
    api_key = "your_api_key_here"

    # Structured data extraction
    news_schema = {
        "type": "object",
        "properties": {
            "headline": {"type": "string"},
            "author": {"type": "string"},
            "publishDate": {"type": "string"},
            "category": {"type": "string"},
            "summary": {"type": "string", "maxLength": 300},
            "keyPoints": {"type": "array", "items": {"type": "string"}},
            "quotes": {
                "type": "array",
                "items": {
                    "type": "object",
                    "properties": {
                        "text": {"type": "string"},
                        "speaker": {"type": "string"}
                    }
                }
            },
            "readingTime": {"type": "number"},
            "credibilityScore": {"type": "number", "minimum": 1, "maximum": 10}
        },
        "required": ["headline", "publishDate", "summary"]
    }

    # Get structured metadata
    structured_response = requests.post(
        'https://api.weblinq.dev/v1/web/extract-json',
        headers={'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'},
        json={
            'url': article_url,
            'response_format': {'type': 'json_schema', 'json_schema': news_schema},
            'prompt': 'Extract comprehensive news article information'
        }
    )

    # Get contextual analysis
    analysis_response = requests.post(
        'https://api.weblinq.dev/v1/web/extract-json',
        headers={'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'},
        json={
            'url': article_url,
            'prompt': '''Analyze this news story for:
            - Historical context and background
            - Potential long-term implications
            - Different stakeholder perspectives
            - Related stories or trends
            - Questions this raises for further investigation

            Provide thoughtful analysis that goes beyond just summarizing the facts.''',
            'response_format': {'type': 'text'}
        }
    )

    return {
        'metadata': structured_response.json()['data']['extracted'],
        'analysis': analysis_response.json()['data']['extracted']
    }

News Monitoring Workflow

Automated news monitoring combining search, extraction, and archival:

async function monitorNewsTopics(topics, maxArticlesPerTopic = 5) {
  const apiKey = process.env.WEBLINQ_API_KEY;
  const baseUrl = 'https://api.weblinq.dev/v1';
  
  const allArticles = [];
  
  for (const topic of topics) {
    // Step 1: Search for recent news on the topic
    const searchResponse = await fetch(`${baseUrl}/web/search`, {
      method: 'POST',
      headers: {
        'Authorization': `Bearer ${apiKey}`,
        'Content-Type': 'application/json'
      },
      body: JSON.stringify({
        query: `${topic} news site:cnn.com OR site:bbc.com OR site:reuters.com OR site:apnews.com`,
        limit: maxArticlesPerTopic
      })
    });

    const searchData = await searchResponse.json();

    if (!searchData.success) {
      console.error(`Search failed for topic: ${topic}`);
      continue;
    }

    // Step 2: Extract clean content from each article
    for (const result of searchData.data.results) {
      try {
        // Get clean markdown content
        const markdownResponse = await fetch(`${baseUrl}/web/markdown`, {
          method: 'POST',
          headers: {
            'Authorization': `Bearer ${apiKey}`,
            'Content-Type': 'application/json'
          },
          body: JSON.stringify({ url: result.url })
        });

        // Extract structured data
        const newsSchema = {
          type: "object",
          properties: {
            headline: { type: "string" },
            author: { type: "string" },
            publishDate: { type: "string" },
            category: { type: "string" },
            sentiment: {
              type: "string",
              enum: ["positive", "negative", "neutral"]
            },
            keyPoints: { type: "array", items: { type: "string" } },
            entities: {
              type: "object",
              properties: {
                people: { type: "array", items: { type: "string" } },
                organizations: { type: "array", items: { type: "string" } },
                locations: { type: "array", items: { type: "string" } }
              }
            },
            credibilityScore: { type: "number", minimum: 1, maximum: 10 }
          }
        };

        const extractResponse = await fetch(`${baseUrl}/web/extract-json`, {
          method: 'POST',
          headers: {
            'Authorization': `Bearer ${apiKey}`,
            'Content-Type': 'application/json'
          },
          body: JSON.stringify({
            url: result.url,
            response_format: {
              type: "json_schema",
              json_schema: newsSchema
            },
            prompt: `Analyze this news article about "${topic}" and extract key information`
          })
        });

        // Take screenshot for visual record
        const screenshotResponse = await fetch(`${baseUrl}/web/screenshot`, {
          method: 'POST',
          headers: {
            'Authorization': `Bearer ${apiKey}`,
            'Content-Type': 'application/json'
          },
          body: JSON.stringify({
            url: result.url,
            screenshotOptions: { fullPage: false, type: 'png' }
          })
        });

        const [markdown, extracted, screenshot] = await Promise.all([
          markdownResponse.json(),
          extractResponse.json(),
          screenshotResponse.json()
        ]);

        if (extracted.success) {
          allArticles.push({
            topic,
            url: result.url,
            source: new URL(result.url).hostname,
            searchTitle: result.title,
            content: markdown.success ? markdown.data.markdown : null,
            structured: extracted.data.extracted,
            screenshot: screenshot.success ? screenshot.data.permanentUrl : null,
            extractedAt: new Date().toISOString()
          });
        }

      } catch (error) {
        console.error(`Error processing ${result.url}:`, error);
      }

      // Rate limiting
      await new Promise(resolve => setTimeout(resolve, 1500));
    }

}

return {
topics,
totalArticles: allArticles.length,
articles: allArticles,
byTopic: topics.reduce((acc, topic) => {
acc[topic] = allArticles.filter(article => article.topic === topic);
return acc;
}, {}),
bySentiment: {
positive: allArticles.filter(a => a.structured?.sentiment === 'positive'),
negative: allArticles.filter(a => a.structured?.sentiment === 'negative'),
neutral: allArticles.filter(a => a.structured?.sentiment === 'neutral')
}
};
}

```python Python News Extraction
import requests

def extract_news_article(article_url):
    api_key = "your_api_key_here"

    news_schema = {
        "type": "object",
        "properties": {
            "headline": {"type": "string"},
            "author": {"type": "string"},
            "publishDate": {"type": "string"},
            "category": {"type": "string"},
            "summary": {"type": "string", "maxLength": 300},
            "keyPoints": {"type": "array", "items": {"type": "string"}},
            "quotes": {
                "type": "array",
                "items": {
                    "type": "object",
                    "properties": {
                        "text": {"type": "string"},
                        "speaker": {"type": "string"}
                    }
                }
            },
            "readingTime": {"type": "number"},
            "credibilityScore": {"type": "number", "minimum": 1, "maximum": 10}
        },
        "required": ["headline", "publishDate", "summary"]
    }

    response = requests.post(
        'https://api.weblinq.dev/v1/web/extract-json',
        headers={
            'Authorization': f'Bearer {api_key}',
            'Content-Type': 'application/json'
        },
        json={
            'url': article_url,
            'response_format': {
                'type': 'json_schema',
                'json_schema': news_schema
            },
            'prompt': 'Extract comprehensive news article information'
        }
    )

    data = response.json()
    if not data['success']:
        raise Exception(data.get('error', {}).get('message', 'Failed to extract'))

    return data['data']['extracted']

Breaking News Alerts

Real-time news monitoring with instant alerts:

async function monitorBreakingNews(keywords, checkInterval = 300000) { // 5 minutes
  const apiKey = process.env.WEBLINQ_API_KEY;
  const baseUrl = 'https://api.weblinq.dev/v1';

let lastCheckTime = new Date();

const alertSchema = {
type: "object",
properties: {
urgency: {
type: "string",
enum: ["low", "medium", "high", "critical"]
},
headline: { type: "string" },
summary: { type: "string", maxLength: 200 },
impact: {
type: "object",
properties: {
geographic: { type: "string" },
sectors: { type: "array", items: { type: "string" } },
stakeholders: { type: "array", items: { type: "string" } }
}
},
timeline: {
type: "object",
properties: {
when: { type: "string" },
expectedDuration: { type: "string" }
}
},
actionRequired: { type: "boolean" }
}
};

async function checkForBreakingNews() {
const searchQuery = keywords.map(keyword =>
`"${keyword}" breaking news OR "${keyword}" urgent OR "${keyword}" alert`
).join(' OR ');

    try {
      const searchResponse = await fetch(`${baseUrl}/web/search`, {
        method: 'POST',
        headers: {
          'Authorization': `Bearer ${apiKey}`,
          'Content-Type': 'application/json'
        },
        body: JSON.stringify({
          query: searchQuery + ` after:${lastCheckTime.toISOString().split('T')[0]}`,
          limit: 10
        })
      });

      const searchData = await searchResponse.json();

      if (!searchData.success) return;

      for (const result of searchData.data.results) {
        // Extract alert information
        const alertResponse = await fetch(`${baseUrl}/web/extract-json`, {
          method: 'POST',
          headers: {
            'Authorization': `Bearer ${apiKey}`,
            'Content-Type': 'application/json'
          },
          body: JSON.stringify({
            url: result.url,
            response_format: {
              type: "json_schema",
              json_schema: alertSchema
            },
            prompt: "Analyze if this is breaking news and extract urgency, impact, and action requirements"
          })
        });

        const alertData = await alertResponse.json();

        if (alertData.success && alertData.data.extracted.urgency !== 'low') {
          await sendAlert({
            ...alertData.data.extracted,
            url: result.url,
            source: new URL(result.url).hostname,
            detectedAt: new Date().toISOString()
          });
        }

        await new Promise(resolve => setTimeout(resolve, 1000));
      }

      lastCheckTime = new Date();

    } catch (error) {
      console.error('Error checking breaking news:', error);
    }

}

async function sendAlert(alert) {
console.log(`🚨 BREAKING NEWS ALERT - ${alert.urgency.toUpperCase()}`);
console.log(`Headline: ${alert.headline}`);
console.log(`Summary: ${alert.summary}`);
console.log(`Source: ${alert.source}`);
console.log(`URL: ${alert.url}`);

    if (alert.actionRequired) {
      console.log('⚠️ ACTION REQUIRED');
    }

    // Here you would integrate with your notification system:
    // - Send email alerts
    // - Push notifications
    // - Slack/Discord webhooks
    // - SMS alerts for critical news

}

// Start monitoring
console.log(`Starting breaking news monitoring for: ${keywords.join(', ')}`);

// Initial check
await checkForBreakingNews();

// Set up interval
setInterval(checkForBreakingNews, checkInterval);
}

// Usage
await monitorBreakingNews(['earthquake', 'market crash', 'cyber attack'], 300000);

News Aggregation Report

Generate comprehensive news reports with PDF summaries:

async function generateNewsReport(topics, timeframe = '24h') {
  const apiKey = process.env.WEBLINQ_API_KEY;
  const baseUrl = 'https://api.weblinq.dev/v1';

  const reportData = [];

  for (const topic of topics) {
    // Search for news on this topic
    const searchResponse = await fetch(`${baseUrl}/web/search`, {
      method: 'POST',
      headers: {
        'Authorization': `Bearer ${apiKey}`,
        'Content-Type': 'application/json'
      },
      body: JSON.stringify({
        query: `${topic} news`,
        limit: 8
      })
    });

    const searchData = await searchResponse.json();

    if (!searchData.success) continue;

    // Extract key information from top articles
    const topicArticles = [];

    for (const result of searchData.data.results.slice(0, 5)) {
      const summarySchema = {
        type: "object",
        properties: {
          headline: { type: "string" },
          keyPoints: { type: "array", items: { type: "string" } },
          impact: { type: "string" },
          trend: { type: "string", enum: ["rising", "stable", "declining"] },
          sentiment: { type: "string", enum: ["positive", "negative", "neutral"] }
        }
      };

      const extractResponse = await fetch(`${baseUrl}/web/extract-json`, {
        method: 'POST',
        headers: {
          'Authorization': `Bearer ${apiKey}`,
          'Content-Type': 'application/json'
        },
        body: JSON.stringify({
          url: result.url,
          response_format: {
            type: "json_schema",
            json_schema: summarySchema
          },
          prompt: `Summarize this news article about "${topic}"`
        })
      });

      const extractData = await extractResponse.json();

      if (extractData.success) {
        topicArticles.push({
          url: result.url,
          source: new URL(result.url).hostname,
          ...extractData.data.extracted
        });
      }

      await new Promise(resolve => setTimeout(resolve, 1000));
    }

    reportData.push({
      topic,
      articles: topicArticles,
      summary: generateTopicSummary(topicArticles)
    });

}

// Create HTML report
const reportHtml = generateReportHTML(reportData, timeframe);

// Convert to PDF
const dataUri = 'data:text/html;base64,' + Buffer.from(reportHtml).toString('base64');

const pdfResponse = await fetch(`${baseUrl}/web/pdf`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
url: dataUri
})
});

const pdf = await pdfResponse.json();

return {
topics,
reportData,
pdfUrl: pdf.success ? pdf.data.permanentUrl : null,
generatedAt: new Date().toISOString(),
stats: {
totalArticles: reportData.reduce((sum, topic) => sum + topic.articles.length, 0),
sentimentBreakdown: calculateSentimentBreakdown(reportData),
trendAnalysis: calculateTrendAnalysis(reportData)
}
};
}

function generateReportHTML(reportData, timeframe) {
return `
<html>
<head>
<title>News Report - ${new Date().toLocaleDateString()}</title>
<style>
body { font-family: Arial, sans-serif; margin: 40px; line-height: 1.6; }
h1 { color: #2c3e50; border-bottom: 3px solid #3498db; padding-bottom: 10px; }
h2 { color: #34495e; margin-top: 30px; }
.article { margin: 20px 0; padding: 15px; border-left: 4px solid #3498db; background: #f8f9fa; }
.positive { border-left-color: #27ae60; }
.negative { border-left-color: #e74c3c; }
.neutral { border-left-color: #95a5a6; }
.source { font-size: 0.9em; color: #7f8c8d; }
.key-points { margin-top: 10px; }
.key-points li { margin: 5px 0; }
</style>
</head>
<body>
<h1>News Report - ${timeframe}</h1>
<p><strong>Generated:</strong> ${new Date().toLocaleString()}</p>

        ${reportData.map(topic => `
          <h2>${topic.topic}</h2>
          <div class="topic-summary">
            <p><strong>Articles Analyzed:</strong> ${topic.articles.length}</p>
            <p><strong>Overall Trend:</strong> ${topic.summary.trend}</p>
            <p><strong>Key Themes:</strong> ${topic.summary.themes.join(', ')}</p>
          </div>

          ${topic.articles.map(article => `
            <div class="article ${article.sentiment}">
              <h3>${article.headline}</h3>
              <p class="source">Source: ${article.source}</p>
              <div class="key-points">
                <strong>Key Points:</strong>
                <ul>
                  ${article.keyPoints.map(point => `<li>${point}</li>`).join('')}
                </ul>
              </div>
              <p><strong>Impact:</strong> ${article.impact}</p>
            </div>
          `).join('')}
        `).join('')}
      </body>
    </html>

`;
}

```python Python News Extraction
import requests

def extract_news_article(article_url):
    api_key = "your_api_key_here"

    news_schema = {
        "type": "object",
        "properties": {
            "headline": {"type": "string"},
            "author": {"type": "string"},
            "publishDate": {"type": "string"},
            "category": {"type": "string"},
            "summary": {"type": "string", "maxLength": 300},
            "keyPoints": {"type": "array", "items": {"type": "string"}},
            "quotes": {
                "type": "array",
                "items": {
                    "type": "object",
                    "properties": {
                        "text": {"type": "string"},
                        "speaker": {"type": "string"}
                    }
                }
            },
            "readingTime": {"type": "number"},
            "credibilityScore": {"type": "number", "minimum": 1, "maximum": 10}
        },
        "required": ["headline", "publishDate", "summary"]
    }

    response = requests.post(
        'https://api.weblinq.dev/v1/web/extract-json',
        headers={
            'Authorization': f'Bearer {api_key}',
            'Content-Type': 'application/json'
        },
        json={
            'url': article_url,
            'response_format': {
                'type': 'json_schema',
                'json_schema': news_schema
            },
            'prompt': 'Extract comprehensive news article information'
        }
    )

    data = response.json()
    if not data['success']:
        raise Exception(data.get('error', {}).get('message', 'Failed to extract'))

    return data['data']['extracted']

Real-time Source Monitoring

Monitor specific news sources for new content:

async function monitorNewsSources(sources, categories = []) {
  const apiKey = process.env.WEBLINQ_API_KEY;
  const baseUrl = 'https://api.weblinq.dev/v1';

const results = [];

for (const source of sources) {
try {
// Extract all news links from the homepage
const linksResponse = await fetch(`${baseUrl}/web/links`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
url: source.url,
includeExternal: false,

})
});

      const linksData = await linksResponse.json();

      if (!linksData.success) continue;

      // Filter for article links
      const articleLinks = linksData.data.links.filter(link =>
        link.url.match(/\/(article|story|news|post)\//) ||
        link.url.match(/\/\d{4}\/\d{2}\/\d{2}\//) // Date-based URLs
      );

      // Extract headlines and metadata
      const sourceSchema = {
        type: "object",
        properties: {
          topStories: {
            type: "array",
            items: {
              type: "object",
              properties: {
                headline: { type: "string" },
                category: { type: "string" },
                publishTime: { type: "string" },
                urgency: { type: "string", enum: ["low", "medium", "high"] }
              }
            }
          },
          breakingNews: { type: "array", items: { type: "string" } },
          trendingTopics: { type: "array", items: { type: "string" } }
        }
      };

      const extractResponse = await fetch(`${baseUrl}/web/extract-json`, {
        method: 'POST',
        headers: {
          'Authorization': `Bearer ${apiKey}`,
          'Content-Type': 'application/json'
        },
        body: JSON.stringify({
          url: source.url,
          response_format: {
            type: "json_schema",
            json_schema: sourceSchema
          },
          prompt: "Extract current top stories, breaking news, and trending topics from this news homepage"
        })
      });

      const extractData = await extractResponse.json();

      results.push({
        source: source.name,
        url: source.url,
        articles: articleLinks.slice(0, 15),
        metadata: extractData.success ? extractData.data.extracted : null,
        scannedAt: new Date().toISOString()
      });

    } catch (error) {
      console.error(`Error monitoring ${source.name}:`, error);
    }

    await new Promise(resolve => setTimeout(resolve, 2000));

}

return {
sources: sources.map(s => s.name),
results,
summary: {
totalArticles: results.reduce((sum, r) => sum + r.articles.length, 0),
breakingNewsCount: results.reduce((sum, r) =>
sum + (r.metadata?.breakingNews?.length || 0), 0
),
activeSources: results.filter(r => r.articles.length > 0).length
}
};
}

// Usage with major news sources
const newsSources = [
{ name: 'CNN', url: 'https://cnn.com' },
{ name: 'BBC', url: 'https://bbc.com/news' },
{ name: 'Reuters', url: 'https://reuters.com' },
{ name: 'AP News', url: 'https://apnews.com' }
];

const sourceUpdate = await monitorNewsSources(newsSources);

API Coverage for News

🔍 Search API

📄 Markdown API

🤖 Extract JSON API

🔗 Links API

📸 Screenshot API

📋 PDF API

News Monitoring Pro Tip: Combine search for discovery, extract-json for structured analysis, markdown for clean content, and PDF for professional reports. This creates a complete news intelligence pipeline.

📊 JSON Schema for News

📝 Text Response for News

Introduction

Guides

Comprehensive News Analysis

News Monitoring Workflow

Breaking News Alerts

News Aggregation Report

Real-time Source Monitoring

API Coverage for News

Introduction

Guides

​Comprehensive News Analysis

​News Monitoring Workflow

​Breaking News Alerts

​News Aggregation Report

​Real-time Source Monitoring

​API Coverage for News

Comprehensive News Analysis

News Monitoring Workflow

Breaking News Alerts

News Aggregation Report

Real-time Source Monitoring

API Coverage for News