![]() Server : Apache/2 System : Linux server-15-235-50-60 5.15.0-164-generic #174-Ubuntu SMP Fri Nov 14 20:25:16 UTC 2025 x86_64 User : gositeme ( 1004) PHP Version : 8.2.29 Disable Function : exec,system,passthru,shell_exec,proc_close,proc_open,dl,popen,show_source,posix_kill,posix_mkfifo,posix_getpwuid,posix_setpgid,posix_setsid,posix_setuid,posix_setgid,posix_seteuid,posix_setegid,posix_uname Directory : /home/gositeme/domains/lavocat.quebec/public_html/scripts/ |
const { BarreauScraper } = require('../src/lib/barreau-scraper.js');
async function testBarreauScraper() {
console.log('๐งช Testing Barreau Scraper...\n');
const scraper = new BarreauScraper();
try {
// Initialize the scraper
console.log('1๏ธโฃ Initializing scraper...');
await scraper.initialize();
console.log('โ
Scraper initialized successfully\n');
// Test with a small sample (just first page)
console.log('2๏ธโฃ Testing with first page only...');
// Override the scrapeEntireDirectory method for testing
const originalMethod = scraper.scrapeEntireDirectory.bind(scraper);
scraper.scrapeEntireDirectory = async function() {
console.log('๐ Starting test scraping (first page only)...');
if (!this.browser || !this.page) {
throw new Error('Scraper not initialized');
}
this.isRunning = true;
this.progress = {
totalPages: 1, // Force to 1 for testing
currentPage: 0,
totalLawyers: 0,
importedLawyers: 0,
errors: 0,
startTime: Date.now()
};
try {
// Navigate to the main directory page
await this.page.goto('https://www.barreau.qc.ca/fr/trouver-avocat', {
waitUntil: 'networkidle2',
timeout: 30000
});
console.log('๐ Scraping page 1/1');
// Wait for lawyer listings to load
await this.page.waitForSelector('.lawyer-listing, .avocat-item, .result-item, .search-result', {
timeout: 10000
});
// Get all lawyer links on this page
const lawyerLinks = await this.page.evaluate(() => {
const links = Array.from(document.querySelectorAll('a[href*="/avocat/"], a[href*="/lawyer/"], .lawyer-listing a, .avocat-item a, .search-result a'));
return links.map(link => link.href).filter(href =>
href.includes('/avocat/') ||
href.includes('/lawyer/') ||
href.includes('/repertoire/')
);
});
console.log(`๐ Found ${lawyerLinks.length} lawyers on page 1`);
// Test with first 3 lawyers only
const testLawyers = lawyerLinks.slice(0, 3);
console.log(`๐งช Testing with ${testLawyers.length} lawyers...`);
for (const lawyerUrl of testLawyers) {
try {
console.log(`๐ Scraping: ${lawyerUrl}`);
await this.scrapeLawyerProfile(lawyerUrl);
this.progress.importedLawyers++;
console.log(`โ
Successfully scraped lawyer`);
// Add delay between lawyer profiles
await this.delay(2000);
} catch (error) {
console.error(`โ Error scraping lawyer profile ${lawyerUrl}:`, error.message);
this.progress.errors++;
}
}
console.log('๐ Test scraping completed!');
this.printProgressReport();
} catch (error) {
console.error('โ Fatal error during test scraping:', error);
throw error;
} finally {
this.isRunning = false;
}
};
// Run the test
await scraper.scrapeEntireDirectory();
console.log('\n๐ฏ Test Results:');
console.log('================');
const progress = scraper.getProgress();
console.log(`๐ Lawyers imported: ${progress.importedLawyers}`);
console.log(`โ Errors: ${progress.errors}`);
console.log(`๐ Success rate: ${((progress.importedLawyers / (progress.importedLawyers + progress.errors)) * 100).toFixed(1)}%`);
} catch (error) {
console.error('โ Test failed:', error);
} finally {
// Clean up
console.log('\n๐งน Cleaning up...');
await scraper.stop();
console.log('โ
Test completed');
}
}
// Run the test
testBarreauScraper().catch(console.error);