import axios from 'axios';

const GDELT_BASE_URL = 'https://api.gdeltproject.org/api/v2/doc/doc';

function formatDateForGDELT(date) {
  return date.toISOString().replace(/[-:T.Z]/g, '').slice(0, 14);
}

export async function fetchGdeltData(query, startDate, endDate, maxRecords = 250) {
  const formattedStartDate = formatDateForGDELT(new Date(startDate));
  const formattedEndDate = formatDateForGDELT(new Date(endDate));
  
  const url = `${GDELT_BASE_URL}?query=${encodeURIComponent(query)}&mode=artlist&format=json&startdatetime=${formattedStartDate}&enddatetime=${formattedEndDate}&maxrecords=${maxRecords}&sort=DateDesc&imagetype=photo`;
  
  console.log(`Fetching GDELT data from URL: ${url}`);

  try {
    const response = await axios.get(url);
    console.log('GDELT API response status:', response.status);
    
    if (typeof response.data === 'string') {
      console.warn('GDELT API returned a string:', response.data);
      return [];
    }

    if (!response.data || !Array.isArray(response.data.articles)) {
      console.warn('Unexpected GDELT API response format:', response.data);
      return [];
    }

    const articles = response.data.articles;
    console.log(`Found ${articles.length} articles`);

    // Filter for English articles
    const englishArticles = articles.filter(article => 
      article.language && article.language.toLowerCase() === 'english'
    );

    console.log(`Found ${englishArticles.length} English articles`);

    const newsArticles = englishArticles.filter(article => {
      const lowercaseTitle = article.title.toLowerCase();
      
      // For non-headline queries, be less restrictive
      if (!query.includes('domain:')) {
        return true;
      }
      
      // For headline queries, maintain stricter filtering
      const excludeKeywords = [
        'wordle', 'puzzle', 'horoscope', 'astrology',
        'recipe', 'gossip'
      ];
      
      return !excludeKeywords.some(keyword => lowercaseTitle.includes(keyword));
    });

    console.log(`Found ${newsArticles.length} news articles after filtering`);

    return newsArticles;
  } catch (error) {
    console.error('Error fetching GDELT data:', error);
    if (error.response) {
      console.error('Error response:', error.response.data);
    }
    return [];
  }
}

// Helper function to check if a domain is a news source
function isNewsDomain(domain) {
  const newsDomains = [
    'cnn.com', 'bbc.com', 'nytimes.com', 'washingtonpost.com', 'wsj.com',
    'reuters.com', 'apnews.com', 'npr.org', 'foxnews.com', 'nbcnews.com',
    'cbsnews.com', 'abcnews.go.com', 'politico.com', 'thehill.com',
    'bloomberg.com', 'cnbc.com', 'economist.com', 'ft.com'
    // Add more trusted news domains as needed
  ];
  
  return newsDomains.some(newsDomain => domain.includes(newsDomain));
}

const POLITICS_KEYWORDS = ['politics', 'government', 'election', 'congress', 'senate', 'president', 'democrat', 'republican', 'washington', 'white house', 'supreme court'];
const BUSINESS_TECH_KEYWORDS = ['business', 'economy', 'technology', 'market', 'stock', 'trade', 'company', 'startup', 'silicon valley', 'wall street', 'nasdaq', 'dow jones'];
const ENTERTAINMENT_SPORTS_KEYWORDS = ['entertainment', 'sports', 'celebrity', 'movie', 'music', 'game', 'tv', 'show', 'actor', 'athlete', 'hollywood', 'nba', 'nfl', 'mlb', 'nhl'];

export function categorizeArticle(article) {
  console.log('Article title before categorization:', article.title);
  const title = article.title.toLowerCase();
  const description = article.description ? article.description.toLowerCase() : '';
  const content = article.content ? article.content.toLowerCase() : '';
  const fullText = `${title} ${description} ${content}`;

  const politicsKeywords = ['politics', 'government', 'election', 'congress', 'senate', 'president', 'democrat', 'republican', 'policy', 'legislation', 'vote', 'campaign', 'political', 'diplomat', 'treaty', 'bill', 'law'];
  const businessKeywords = ['business', 'economy', 'market', 'stock', 'trade', 'company', 'finance', 'investment', 'industry', 'corporate', 'commerce', 'economic'];
  const techKeywords = ['technology', 'tech', 'innovation', 'startup', 'software', 'hardware', 'internet', 'ai', 'artificial intelligence', 'cybersecurity', 'digital'];
  const entertainmentSportsKeywords = ['entertainment', 'sports', 'celebrity', 'movie', 'music', 'game', 'tv', 'show', 'actor', 'athlete', 'film', 'concert', 'performance', 'star', 'player', 'team', 'league', 'tournament'];

  const checkKeywords = (keywords) => keywords.filter(keyword => fullText.includes(keyword)).length;

  const politicsScore = checkKeywords(politicsKeywords);
  const businessScore = checkKeywords(businessKeywords);
  const techScore = checkKeywords(techKeywords);
  const entertainmentSportsScore = checkKeywords(entertainmentSportsKeywords);

  const businessTechScore = businessScore + techScore;

  const scores = [
    { category: 'politics', score: politicsScore },
    { category: 'business-tech', score: businessTechScore },
    { category: 'entertainment-sports', score: entertainmentSportsScore }
  ];

  scores.sort((a, b) => b.score - a.score);

  console.log('Article categorization:', {
    title: article.title,
    scores: scores,
    selectedCategory: scores[0].score === 0 ? 'other' : scores[0].category
  });

  console.log('Article title after categorization:', article.title);
  if (scores[0].score === 0) return 'other';
  return scores[0].category;
}

export function calculateDiversity(article) {
  // This is a placeholder. Implement actual diversity calculation logic here.
  return Math.random() * 100;
}

// Define media bias categories
const FAR_LEFT = [
  'jacobinmag.com', 'motherjones.com', 'thenation.com', 'alternet.org', 'democracynow.org',
  'commondreams.org', 'truthout.org', 'rawstory.com', 'thinkprogress.org', 'dailykos.com'
];

const MODERATE_LEFT = [
  'huffpost.com', 'msnbc.com', 'vox.com', 'slate.com', 'thedailybeast.com', 'nytimes.com',
  'washingtonpost.com', 'cnn.com', 'politico.com', 'theatlantic.com', 'newyorker.com',
  'theguardian.com', 'time.com', 'nbcnews.com', 'abcnews.go.com'
];

const CENTER = [
  'reuters.com', 'apnews.com', 'bbc.com', 'bloomberg.com', 'thehill.com', 'npr.org',
  'csmonitor.com', 'usnews.com', 'marketwatch.com', 'economist.com'
];

const MODERATE_RIGHT = [
  'wsj.com', 'foxbusiness.com', 'reason.com', 'forbes.com', 'foxnews.com', 'nypost.com',
  'washingtonexaminer.com', 'dailycaller.com', 'nationalreview.com', 'washingtontimes.com'
];

const FAR_RIGHT = [
  'breitbart.com', 'oann.com', 'thegatewaypundit.com', 'infowars.com', 'wnd.com', 'newsmax.com',
  'dailywire.com', 'theblaze.com', 'townhall.com', 'americanthinker.com'
];

export const getSourcesForBias = (bias) => {
  let sources = [];
  if (bias === 0) {
    sources = [...FAR_LEFT, ...MODERATE_LEFT.slice(0, 5)];
  } else if (bias === 25) {
    sources = [...MODERATE_LEFT, ...CENTER.slice(0, 5)];
  } else if (bias === 50) {
    sources = [...MODERATE_LEFT.slice(-5), ...CENTER, ...MODERATE_RIGHT.slice(0, 5)];
  } else if (bias === 75) {
    sources = [...CENTER.slice(-5), ...MODERATE_RIGHT, ...FAR_RIGHT.slice(0, 5)];
  } else if (bias === 100) {
    sources = [...MODERATE_RIGHT.slice(-5), ...FAR_RIGHT];
  }

  // Add some common news aggregators and international sources
  sources = [...sources, 'yahoo.com', 'msn.com', 'news.google.com', 'news.bing.com', 'aol.com', 'dw.com', 'france24.com', 'aljazeera.com'];

  console.log(`Bias: ${bias}, Selected sources:`, sources);
  return sources;
};
