T.ME/BIBIL_0DAY
CasperSecurity


Server : Apache/2
System : Linux server-15-235-50-60 5.15.0-164-generic #174-Ubuntu SMP Fri Nov 14 20:25:16 UTC 2025 x86_64
User : gositeme ( 1004)
PHP Version : 8.2.29
Disable Function : exec,system,passthru,shell_exec,proc_close,proc_open,dl,popen,show_source,posix_kill,posix_mkfifo,posix_getpwuid,posix_setpgid,posix_setsid,posix_setuid,posix_setgid,posix_seteuid,posix_setegid,posix_uname
Directory :  /home/gositeme/domains/lavocat.ca/public_html/src/lib/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Current File : /home/gositeme/domains/lavocat.ca/public_html/src/lib/barreau-scraper.ts
import { prisma } from './prisma';
import * as puppeteer from 'puppeteer';
import { BarreauLawyerData } from './barreau-verification';
import fetch from 'node-fetch';
import * as cheerio from 'cheerio';

interface ScrapedLawyerData {
  barNumber: string;
  name: string;
  email?: string;
  phone?: string;
  address?: string;
  specializations: string[];
  regions: string[];
  acceptsLegalAid: boolean;
  status: 'ACTIVE' | 'SUSPENDED' | 'INACTIVE';
  website?: string;
  linkedinUrl?: string;
  education?: string;
  yearsOfExperience?: number;
  languages?: string[];
  bio?: string;
}

export class BarreauScraper {
  private browser: any = null;
  private page: any = null;
  private isRunning = false;
  private progress = {
    totalPages: 0,
    currentPage: 0,
    totalLawyers: 0,
    importedLawyers: 0,
    errors: 0,
    startTime: Date.now()
  };

  /**
   * Initialize the scraper
   */
  async initialize() {
    if (this.isRunning) {
      throw new Error('Scraper is already running');
    }

    console.log('🚀 Initializing Barreau scraper...');
    
    this.browser = await puppeteer.launch({
      headless: true,
      args: [
        '--no-sandbox',
        '--disable-setuid-sandbox',
        '--disable-dev-shm-usage',
        '--disable-accelerated-2d-canvas',
        '--no-first-run',
        '--no-zygote',
        '--disable-gpu'
      ]
    });

    this.page = await this.browser.newPage();
    
    // Set user agent to avoid detection
    await this.page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36');
    
    // Set viewport
    await this.page.setViewport({ width: 1920, height: 1080 });

    console.log('✅ Scraper initialized successfully');
  }

  /**
   * Scrape the entire Barreau directory
   */
  async scrapeEntireDirectory() {
    if (!this.browser || !this.page) {
      throw new Error('Scraper not initialized');
    }

    this.isRunning = true;
    this.progress = {
      totalPages: 0,
      currentPage: 0,
      totalLawyers: 0,
      importedLawyers: 0,
      errors: 0,
      startTime: Date.now()
    };

    try {
      console.log('🔍 Starting Barreau directory scraping...');

      // Navigate to the main directory page
      await this.page.goto('https://www.barreau.qc.ca/fr/trouver-avocat', {
        waitUntil: 'networkidle2',
        timeout: 30000
      });

      // Get total number of pages
      const totalPages = await this.getTotalPages();
      this.progress.totalPages = totalPages;

      console.log(`📊 Found ${totalPages} pages to scrape`);

      // Scrape each page
      for (let pageNum = 1; pageNum <= totalPages; pageNum++) {
        this.progress.currentPage = pageNum;
        
        console.log(`📄 Scraping page ${pageNum}/${totalPages}`);
        
        try {
          await this.scrapePage(pageNum);
          
          // Add delay between pages to be respectful
          await this.delay(2000);
          
        } catch (error) {
          console.error(`❌ Error scraping page ${pageNum}:`, error);
          this.progress.errors++;
        }
      }

      console.log('🎉 Directory scraping completed!');
      this.printProgressReport();

    } catch (error) {
      console.error('❌ Fatal error during scraping:', error);
      throw error;
    } finally {
      this.isRunning = false;
    }
  }

  /**
   * Get total number of pages in the directory (updated for Barreau HTML)
   */
  private async getTotalPages(): Promise<number> {
    if (!this.page) throw new Error('Page not initialized');

    try {
      // Look for the results count (e.g., "175 résultats")
      const totalResults = await this.page.evaluate(() => {
        const el = document.querySelector('.search-results__count, .results-count, .pagination-info');
        if (el) {
          const match = el.textContent?.match(/(\d+)[^\d]*résultat/);
          if (match) return parseInt(match[1], 10);
        }
        // Fallback: try to count rows
        const rows = document.querySelectorAll('table tbody tr');
        return rows.length > 0 ? rows.length : 0;
      });
      // Barreau seems to show 20 results per page
      return totalResults ? Math.ceil(totalResults / 20) : 1;
    } catch (error) {
      console.warn('⚠️ Could not determine total pages, defaulting to 1');
      return 1;
    }
  }

  /**
   * Scrape a single page of the directory (updated for Barreau HTML)
   */
  private async scrapePage(pageNum: number) {
    if (!this.page) throw new Error('Page not initialized');

    try {
      // Navigate to the specific page
      const pageUrl = `https://www.barreau.qc.ca/fr/trouver-avocat?page=${pageNum}`;
      await this.page.goto(pageUrl, { waitUntil: 'networkidle2', timeout: 30000 });

      // Wait for the results table to load
      await this.page.waitForSelector('table tbody tr', { timeout: 10000 });

      // Get all lawyer profile links from the table
      const lawyerLinks = await this.page.evaluate(() => {
        // Each row: first cell contains a link to the profile
        const rows = Array.from(document.querySelectorAll('table tbody tr'));
        return rows.map(row => {
          const link = row.querySelector('td a') as HTMLAnchorElement;
          return link ? link.href : null;
        }).filter(Boolean);
      });

      console.log(`📋 Found ${lawyerLinks.length} lawyers on page ${pageNum}`);

      // Scrape each lawyer's profile
      for (const lawyerUrl of lawyerLinks) {
        try {
          await this.scrapeLawyerProfile(lawyerUrl);
          this.progress.importedLawyers++;
          await this.delay(1000);
        } catch (error) {
          console.error(`❌ Error scraping lawyer profile ${lawyerUrl}:`, error);
          this.progress.errors++;
        }
      }

    } catch (error) {
      console.error(`❌ Error scraping page ${pageNum}:`, error);
      throw error;
    }
  }

  /**
   * Scrape an individual lawyer's profile page (updated for Barreau HTML)
   */
  private async scrapeLawyerProfile(lawyerUrl: string) {
    if (!this.page) throw new Error('Page not initialized');

    try {
      // Navigate to lawyer's profile page
      await this.page.goto(lawyerUrl, { waitUntil: 'networkidle2', timeout: 30000 });

      // Wait for the profile card to load
      await this.page.waitForSelector('.profile-card, .profile, .result-profile, .container', { timeout: 10000 });

      // Extract lawyer data from both tabs
      const lawyerData = await this.page.evaluate(() => {
        const extractText = (selector: string) => {
          const el = document.querySelector(selector);
          return el ? el.textContent?.trim() || '' : '';
        };
        const extractList = (selector: string) => {
          return Array.from(document.querySelectorAll(selector)).map(el => el.textContent?.trim()).filter(Boolean);
        };
        // --- General Tab ---
        const name = extractText('h1, .profile__name, .result-profile__name, .profile-card h1');
        const employer = extractText('td:contains("Société / employeur"), .profile__employer, .result-profile__employer');
        const address = extractText('td:contains("Adresse"), .profile__address, .result-profile__address');
        const phone = extractText('td:contains("Téléphone"), .profile__phone, .result-profile__phone');
        const fax = extractText('td:contains("Télécopieur"), .profile__fax, .result-profile__fax');
        const email = extractText('td:contains("Courriel"), a[href^="mailto:"], .profile__email, .result-profile__email');
        const barNumber = extractText('td:contains("Numéro de membre"), .profile__bar-number, .result-profile__bar-number');
        const domains = extractList('td:contains("Domaines de droit"), .profile__domains, .result-profile__domains li');
        const languages = extractList('td:contains("Langues parlées"), .profile__languages, .result-profile__languages li');
        const year = extractText('td:contains("Année de première inscription"), .profile__year, .result-profile__year');
        // --- Supplementary Tab ---
        // Try to click the supplementary tab if present
        const tab = Array.from(document.querySelectorAll('a, button')).find(el => el.textContent?.toLowerCase().includes('informations supplémentaires'));
        if (tab) (tab as HTMLElement).click();
        // Wait a bit for tab to load (simulate)
        // (In puppeteer, we will wait after this evaluate)
        const section = extractText('td:contains("Barreau de section"), .profile__section, .result-profile__section');
        const mandates = extractList('td:contains("Types de mandats acceptés"), .profile__mandates, .result-profile__mandates li');
        // Compose result
        return {
          name,
          employer,
          address,
          phone,
          fax,
          email,
          barNumber,
          domains,
          languages,
          year,
          section,
          mandates
        };
      });

      // Wait for supplementary tab content if needed
      await this.page.waitForTimeout(500);

      // Process and save the lawyer data
      if (lawyerData.name && lawyerData.barNumber) {
        await this.processLawyerData({
          name: lawyerData.name,
          barNumber: lawyerData.barNumber,
          email: lawyerData.email,
          phone: lawyerData.phone,
          address: lawyerData.address,
          specializations: lawyerData.domains,
          regions: [lawyerData.section].filter(Boolean),
          acceptsLegalAid: lawyerData.mandates.some((m: string) => m.toLowerCase().includes('aide juridique')),
          status: 'ACTIVE', // Default, can be improved
          languages: lawyerData.languages,
          bio: '',
          website: '',
          education: '',
          yearsOfExperience: lawyerData.year ? (new Date().getFullYear() - parseInt(lawyerData.year)) : undefined
        });
        console.log(`✅ Imported: ${lawyerData.name} (${lawyerData.barNumber})`);
      } else {
        console.warn(`⚠️ Skipping lawyer with incomplete data: ${lawyerUrl}`);
      }

    } catch (error) {
      console.error(`❌ Error scraping lawyer profile ${lawyerUrl}:`, error);
      throw error;
    }
  }

  /**
   * Process and save lawyer data to database
   */
  private async processLawyerData(data: ScrapedLawyerData) {
    try {
      // Check if lawyer already exists
      const existingLawyer = await prisma.user.findFirst({
        where: {
          OR: [
            { barNumber: data.barNumber },
            { email: data.email }
          ]
        }
      });

      if (existingLawyer) {
        // Update existing lawyer with new data
        await prisma.user.update({
          where: { id: existingLawyer.id },
          data: {
            name: data.name,
            email: data.email || existingLawyer.email,
            phone: data.phone,
            address: data.address,
            websiteUrl: data.website,
            bio: data.bio,
            specializations: JSON.stringify(data.specializations),
            regions: JSON.stringify(data.regions),
            language: data.languages ? data.languages.join(', ') : 'en',
            education: data.education,
            acceptsLegalAid: data.acceptsLegalAid,
            barreauStatus: data.status,
            verificationStatus: 'VERIFIED_BARREAU',
            barreauVerifiedAt: new Date(),
            isVerified: true,
            isProfilePublic: true
          }
        });
        console.log(`🔄 Updated existing lawyer: ${data.name}`);
      } else {
        // Create new lawyer profile
        const newLawyer = await prisma.user.create({
          data: {
            name: data.name,
            email: data.email || `barreau-${data.barNumber}@placeholder.com`,
            role: 'LAWYER',
            barNumber: data.barNumber,
            phone: data.phone,
            address: data.address,
            websiteUrl: data.website,
            bio: data.bio,
            specializations: JSON.stringify(data.specializations),
            regions: JSON.stringify(data.regions),
            language: data.languages ? data.languages.join(', ') : 'en',
            education: data.education,
            acceptsLegalAid: data.acceptsLegalAid,
            barreauStatus: data.status,
            verificationStatus: 'VERIFIED_BARREAU',
            barreauVerifiedAt: new Date(),
            isVerified: true,
            isProfilePublic: true,
            // Generate a temporary password
            password: await this.generateTemporaryPassword()
          }
        });
        console.log(`🆕 Created new lawyer: ${data.name}`);
      }

    } catch (error) {
      console.error(`❌ Error processing lawyer data for ${data.name}:`, error);
      throw error;
    }
  }

  /**
   * Generate a temporary password for new lawyers
   */
  private async generateTemporaryPassword(): Promise<string> {
    const bcrypt = require('bcryptjs');
    const tempPassword = `Barreau${Date.now()}`;
    return await bcrypt.hash(tempPassword, 12);
  }

  /**
   * Get current scraping progress
   */
  getProgress() {
    return { ...this.progress };
  }

  /**
   * Print progress report
   */
  private printProgressReport() {
    const duration = Date.now() - this.progress.startTime;
    const minutes = Math.floor(duration / 60000);
    const seconds = Math.floor((duration % 60000) / 1000);

    console.log('\n📊 SCRAPING PROGRESS REPORT');
    console.log('============================');
    console.log(`⏱️  Duration: ${minutes}m ${seconds}s`);
    console.log(`📄 Pages scraped: ${this.progress.currentPage}/${this.progress.totalPages}`);
    console.log(`👥 Lawyers imported: ${this.progress.importedLawyers}`);
    console.log(`❌ Errors: ${this.progress.errors}`);
    console.log(`📈 Success rate: ${((this.progress.importedLawyers / (this.progress.importedLawyers + this.progress.errors)) * 100).toFixed(1)}%`);
    console.log('============================\n');
  }

  /**
   * Stop the scraper
   */
  async stop() {
    this.isRunning = false;
    
    if (this.browser) {
      await this.browser.close();
      this.browser = null;
      this.page = null;
    }

    console.log('🛑 Scraper stopped');
  }

  /**
   * Utility function to add delays
   */
  private delay(ms: number): Promise<void> {
    return new Promise(resolve => setTimeout(resolve, ms));
  }
}

// Export singleton instance
export const barreauScraper = new BarreauScraper();

export async function verifyBarreauDirectory(name: string, city: string): Promise<boolean> {
  const searchUrl = `https://www.barreau.qc.ca/fr/trouver-un-avocat/?nom=${encodeURIComponent(name)}&ville=${encodeURIComponent(city)}`;
  const res = await fetch(searchUrl, {
    headers: {
      'User-Agent': 'Mozilla/5.0 (compatible; avocat.quebec bot)'
    }
  });
  const html = await res.text();
  const $ = cheerio.load(html);

  // Adapter le sélecteur selon la structure réelle du site
  const found = $('.resultats .nom').filter((i, el) => {
    return $(el).text().toLowerCase().includes(name.toLowerCase());
  }).length > 0;

  return found;
} 

CasperSecurity Mini