diff --git a/src/__tests__/FeedReaderService.test.ts b/src/__tests__/FeedReaderService.test.ts new file mode 100644 index 0000000..07d403c --- /dev/null +++ b/src/__tests__/FeedReaderService.test.ts @@ -0,0 +1,108 @@ +import { FeedReaderService } from '../services/FeedReaderService'; +import { IFeedRepository } from '../repositories/FeedRepository'; +import { NewsSource } from '../types/Feed'; + +// Mock dependencies +jest.mock('../utils/logger'); +jest.mock('../services/ScrapingService'); +jest.mock('../utils/WebScraper'); +jest.mock('../extractors/ElPaisExtractor'); +jest.mock('../extractors/ElMundoExtractor'); + +// Mock fetch globally +global.fetch = jest.fn(); + +const mockFeedRepository: jest.Mocked = { + create: jest.fn(), + findAll: jest.fn(), + findById: jest.fn(), + findByUrl: jest.fn(), + update: jest.fn(), + delete: jest.fn(), + findBySource: jest.fn(), + findTodaysFrontPage: jest.fn(), + deleteMany: jest.fn(), + count: jest.fn(), + exists: jest.fn() +}; + +// Mock ScrapingService +const mockScrapingService = { + processFeedBatch: jest.fn() +}; + +jest.mock('../services/ScrapingService', () => { + return { + ScrapingService: jest.fn().mockImplementation(() => mockScrapingService) + }; +}); + +// Mock WebScraper +const mockWebScraper = { + scrapeUrl: jest.fn(), + convertToFeedData: jest.fn() +}; + +jest.mock('../utils/WebScraper', () => { + return { + WebScraper: jest.fn().mockImplementation(() => mockWebScraper) + }; +}); + +// Mock extractors +const mockExtractor = { + extractNews: jest.fn(), + isEnabled: jest.fn().mockReturnValue(true), + getName: jest.fn(), + getSource: jest.fn() +}; + +const mockElPaisExtractor = { + ...mockExtractor, + getName: jest.fn().mockReturnValue('El País'), + getSource: jest.fn().mockReturnValue(NewsSource.EL_PAIS) +}; + +const mockElMundoExtractor = { + ...mockExtractor, + getName: jest.fn().mockReturnValue('El Mundo'), + getSource: jest.fn().mockReturnValue(NewsSource.EL_MUNDO) +}; + +jest.mock('../extractors/NewspaperExtractorFactory', () => ({ + NewspaperExtractorFactory: { + getAllAvailableExtractors: jest.fn(() => [mockElPaisExtractor, mockElMundoExtractor]), + createExtractor: jest.fn((source) => { + if (source === NewsSource.EL_PAIS) return mockElPaisExtractor; + if (source === NewsSource.EL_MUNDO) return mockElMundoExtractor; + return null; + }) + } +})); + +describe('FeedReaderService', () => { + let feedReaderService: FeedReaderService; + const mockFetch = fetch as jest.MockedFunction; + + beforeEach(() => { + jest.clearAllMocks(); + feedReaderService = new FeedReaderService(mockFeedRepository); + }); + + describe('Constructor and Initialization', () => { + it('should initialize with available extractors', () => { + const newspapers = feedReaderService.getAvailableNewspapers(); + expect(newspapers).toHaveLength(2); + expect(newspapers.map(n => n.source)).toContain(NewsSource.EL_PAIS); + expect(newspapers.map(n => n.source)).toContain(NewsSource.EL_MUNDO); + }); + + it('should have all extractors enabled by default', () => { + const newspapers = feedReaderService.getAvailableNewspapers(); + newspapers.forEach(newspaper => { + expect(newspaper.enabled).toBe(true); + }); + }); + }); + +}); \ No newline at end of file diff --git a/src/services/FeedReaderService.ts b/src/services/FeedReaderService.ts new file mode 100644 index 0000000..bd5172c --- /dev/null +++ b/src/services/FeedReaderService.ts @@ -0,0 +1,193 @@ +import { ScrapingService } from './ScrapingService'; +import { IFeed, NewsSource } from '../types/Feed'; +import { IFeedRepository } from '../repositories/FeedRepository'; +import { Logger } from '../utils/logger'; +import { BaseNewspaperExtractor } from '../extractors/BaseNewspaperExtractor'; +import { NewspaperExtractorFactory } from '../extractors/NewspaperExtractorFactory'; +import { ScrapingResult } from '../types/NewspaperTypes'; + +/** + * Servicio principal de lectura de feeds mediante web scraping + */ +export class FeedReaderService { + private scrapingService: ScrapingService; + private extractors: Map; + + constructor(feedRepository: IFeedRepository) { + this.scrapingService = new ScrapingService(feedRepository); + this.extractors = new Map(); + this.initializeExtractors(); + } + + /** + * Inicializa todos los extractores disponibles + */ + private initializeExtractors(): void { + const availableExtractors = NewspaperExtractorFactory.getAllAvailableExtractors(); + + for (const extractor of availableExtractors) { + this.extractors.set(extractor.getSource(), extractor); + Logger.info(`Initialized extractor for ${extractor.getName()}`); + } + } + + /** + * Extrae noticias de un periódico específico + */ + async extractFromNewspaper(source: NewsSource): Promise { + const extractor = this.extractors.get(source); + + if (!extractor) { + const error = `No extractor found for source: ${source}`; + Logger.error(error); + return { + success: 0, + failed: 1, + duplicates: 0, + items: [], + errors: [error] + }; + } + + if (!extractor.isEnabled()) { + Logger.info(`Skipping disabled extractor: ${extractor.getName()}`); + return { + success: 0, + failed: 0, + duplicates: 0, + items: [], + errors: [] + }; + } + + try { + Logger.info(`Starting extraction for ${extractor.getName()}`); + const newsItems = await extractor.extractNews(); + + if (newsItems.length === 0) { + Logger.warn(`No news items extracted for ${extractor.getName()}`); + return { + success: 0, + failed: 0, + duplicates: 0, + items: [], + errors: [] + }; + } + + const results = await this.scrapingService.processFeedBatch(newsItems); + const analyzed = this.analyzeResults(results); + + Logger.info(`Completed extraction for ${extractor.getName()}: ${analyzed.success} success, ${analyzed.failed} failed, ${analyzed.duplicates} duplicates`); + return analyzed; + } catch (error) { + const errorMsg = `Error extracting from ${extractor.getName()}: ${error}`; + Logger.error(errorMsg); + return { + success: 0, + failed: 1, + duplicates: 0, + items: [], + errors: [errorMsg] + }; + } + } + + /** + * Extrae noticias de todos los periódicos disponibles + */ + async extractFromAllNewspapers(): Promise> { + Logger.info(`Starting batch extraction from ${this.extractors.size} newspapers`); + const results = new Map(); + + for (const [source, extractor] of this.extractors) { + if (extractor.isEnabled()) { + const result = await this.extractFromNewspaper(source); + results.set(source, result); + } else { + Logger.info(`Skipping disabled newspaper: ${extractor.getName()}`); + } + } + + const totalStats = this.calculateTotalStats(results); + Logger.info(`Batch extraction completed: ${totalStats.success} total success, ${totalStats.failed} total failed, ${totalStats.duplicates} total duplicates`); + + return results; + } + + /** + * Obtiene la lista de periódicos disponibles + */ + getAvailableNewspapers(): { source: NewsSource; name: string; enabled: boolean }[] { + const newspapers: { source: NewsSource; name: string; enabled: boolean }[] = []; + + for (const [source, extractor] of this.extractors) { + newspapers.push({ + source, + name: extractor.getName(), + enabled: extractor.isEnabled() + }); + } + + return newspapers; + } + + /** + * Habilita o deshabilita un extractor específico + */ + setExtractorEnabled(source: NewsSource, enabled: boolean): boolean { + const extractor = this.extractors.get(source); + if (!extractor) { + Logger.error(`Cannot set enabled state: No extractor found for source ${source}`); + return false; + } + + // Nota: En una implementación real, esto podría modificar la configuración + // Por ahora, solo registramos el cambio + Logger.info(`${enabled ? 'Enabled' : 'Disabled'} extractor for ${extractor.getName()}`); + return true; + } + + /** + * Analiza los resultados del procesamiento + */ + private analyzeResults(results: (IFeed | null)[]): ScrapingResult { + const success = results.filter(item => item !== null).length; + const failed = results.filter(item => item === null).length; + + return { + success, + failed, + duplicates: 0, // El ScrapingService maneja duplicados internamente + items: results, + errors: [] + }; + } + + /** + * Calcula estadísticas totales de múltiples resultados + */ + private calculateTotalStats(results: Map): ScrapingResult { + let totalSuccess = 0; + let totalFailed = 0; + let totalDuplicates = 0; + const allItems: (IFeed | null)[] = []; + const allErrors: string[] = []; + + for (const result of results.values()) { + totalSuccess += result.success; + totalFailed += result.failed; + totalDuplicates += result.duplicates; + allItems.push(...result.items); + allErrors.push(...result.errors); + } + + return { + success: totalSuccess, + failed: totalFailed, + duplicates: totalDuplicates, + items: allItems, + errors: allErrors + }; + } +} \ No newline at end of file