add ability to launch the scraper from package.json.scripts

This commit is contained in:
albert
2025-07-29 12:43:48 +02:00
parent e0cb439234
commit 36f6de4edd
2 changed files with 63 additions and 0 deletions

View File

@ -18,6 +18,8 @@
"build": "tsc",
"start": "node dist/server.js",
"dev": "tsx watch src/server.ts",
"scraper": "node dist/scraper.js",
"scraper:dev": "tsx watch src/scraper.ts",
"test": "jest",
"test:watch": "jest --watch",
"lint": "eslint src/**/*.ts",

61
src/scraper.ts Normal file
View File

@ -0,0 +1,61 @@
import { ScrapingScheduler } from './services/ScrapingScheduler.js';
import { FeedRepository } from './repositories/FeedRepository.js';
import { DatabaseConnection } from './config/database.js';
import { Logger } from './utils/logger.js';
let scheduler: ScrapingScheduler;
async function initializeScraper() {
try {
// Connect to database
await DatabaseConnection.getInstance().connect();
Logger.database.connected();
// Initialize repository and scheduler
const feedRepository = new FeedRepository();
scheduler = new ScrapingScheduler(feedRepository, {
intervalMinutes: 30, // Run every 30 minutes
maxRetries: 2,
retryDelayMinutes: 5,
enabled: true
});
// Start the scheduler
scheduler.start();
Logger.info('Scraping scheduler started successfully');
// Log initial stats
const stats = scheduler.getStats();
Logger.info('Initial scheduler stats', stats);
} catch (error) {
Logger.error('Failed to start scraper', { error });
process.exit(1);
}
}
const shutdown = async () => {
try {
if (scheduler) {
await scheduler.shutdown();
Logger.info('Scraping scheduler stopped');
}
await DatabaseConnection.getInstance().disconnect();
Logger.database.disconnected();
process.exit(0);
} catch (error) {
Logger.error('Error during scraper shutdown', { error });
process.exit(1);
}
};
// Handle graceful shutdown
process.on('SIGINT', shutdown);
process.on('SIGTERM', shutdown);
// Start the scraper
initializeScraper().catch(error => {
Logger.error('Failed to initialize scraper', { error });
process.exit(1);
});