![]() Server : Apache/2 System : Linux server-15-235-50-60 5.15.0-164-generic #174-Ubuntu SMP Fri Nov 14 20:25:16 UTC 2025 x86_64 User : gositeme ( 1004) PHP Version : 8.2.29 Disable Function : exec,system,passthru,shell_exec,proc_close,proc_open,dl,popen,show_source,posix_kill,posix_mkfifo,posix_getpwuid,posix_setpgid,posix_setsid,posix_setuid,posix_setgid,posix_seteuid,posix_setegid,posix_uname Directory : /home/gositeme/domains/lavocat.quebec/private_html/scripts/ |
/**
* SCRAPE ADW AVOCATS PROFILE PICTURES SCRIPT
* ==========================================
*
* Purpose: Scrape the ADW Avocats website to find actual profile picture URLs
*
* What it does:
* 1. Scrapes the ADW Avocats team page
* 2. Extracts actual profile picture URLs from the HTML
* 3. Downloads and uploads the images
* 4. Updates user profiles with the profile picture URLs
*
* Usage:
* node scripts/scrape-adw-profile-pictures.js
*
* Dependencies:
* - Prisma client
* - axios (for HTTP requests)
* - cheerio (for HTML parsing)
* - fs (for file operations)
* - path (for file paths)
*
* Created: 2024-01-27
*/
const { PrismaClient } = require('@prisma/client');
const axios = require('axios');
const cheerio = require('cheerio');
const fs = require('fs');
const path = require('path');
const prisma = new PrismaClient();
async function scrapeADWProfilePictures() {
console.log('🔍 Scraping ADW Avocats website for profile pictures...\n');
try {
// Fetch the team page
const response = await axios.get('https://www.adwavocats.com/a-propos/');
const html = response.data;
const $ = cheerio.load(html);
// Find all profile images on the page
const profileImages = [];
// Look for images in the team section
$('img').each((index, element) => {
const src = $(element).attr('src');
const alt = $(element).attr('alt') || '';
const parentText = $(element).parent().text() || '';
// Check if this looks like a profile picture
if (src && (src.includes('wp-content') || src.includes('uploads')) &&
(alt.toLowerCase().includes('avocat') || alt.toLowerCase().includes('lawyer') ||
parentText.toLowerCase().includes('avocat') || parentText.toLowerCase().includes('lawyer'))) {
profileImages.push({
src: src.startsWith('http') ? src : `https://www.adwavocats.com${src}`,
alt: alt,
parentText: parentText.trim()
});
}
});
console.log(`Found ${profileImages.length} potential profile images:`);
profileImages.forEach((img, index) => {
console.log(`${index + 1}. ${img.alt} - ${img.src}`);
});
// Map team members to their images
const teamMapping = {
'justin.wee@adwavocats.com': { name: 'Justin Wee', keywords: ['justin', 'wee'] },
'alain.arsenault@adwavocats.com': { name: 'Alain Arsenault', keywords: ['alain', 'arsenault'] },
'virginie.dufresne-lemire@adwavocats.com': { name: 'Virginie Dufresne-Lemire', keywords: ['virginie', 'dufresne'] },
'jerome.aucoin@adwavocats.com': { name: 'Jérôme Aucoin', keywords: ['jérôme', 'jerome', 'aucoin'] },
'audrey.labrecque@adwavocats.com': { name: 'Audrey Labrecque', keywords: ['audrey', 'labrecque'] },
'ivan.lazarov@adwavocats.com': { name: 'Ivan Lazarov', keywords: ['ivan', 'lazarov'] },
'yalda.machouf-khadir@adwavocats.com': { name: 'Yalda Machouf Khadir', keywords: ['yalda', 'machouf'] },
'olivia.malenfant@adwavocats.com': { name: 'Olivia Malenfant', keywords: ['olivia', 'malenfant'] },
'imane.melab@adwavocats.com': { name: 'Imane Melab', keywords: ['imane', 'melab'] },
'justine.monty@adwavocats.com': { name: 'Justine Monty', keywords: ['justine', 'monty'] },
'mmah.nora.toure@adwavocats.com': { name: 'M\'Mah Nora Touré', keywords: ['mmah', 'nora', 'touré', 'toure'] }
};
// Match images to team members
const matchedImages = {};
for (const [email, member] of Object.entries(teamMapping)) {
for (const img of profileImages) {
const searchText = (img.alt + ' ' + img.parentText).toLowerCase();
const hasMatch = member.keywords.some(keyword =>
searchText.includes(keyword.toLowerCase())
);
if (hasMatch) {
matchedImages[email] = img.src;
console.log(`✅ Matched ${member.name} with image: ${img.src}`);
break;
}
}
}
console.log(`\n📊 Found ${Object.keys(matchedImages).length} matches out of ${Object.keys(teamMapping).length} team members`);
// Download and upload matched images
if (Object.keys(matchedImages).length > 0) {
await downloadAndUploadImages(matchedImages);
} else {
console.log('❌ No profile pictures found. Using generated avatars instead.');
}
} catch (error) {
console.error('❌ Error scraping website:', error.message);
console.log('❌ Falling back to generated avatars.');
} finally {
await prisma.$disconnect();
}
}
async function downloadAndUploadImages(matchedImages) {
console.log('\n📥 Downloading and uploading matched images...\n');
const uploadDir = path.join(__dirname, '../public/uploads/profiles');
// Create directory if it doesn't exist
if (!fs.existsSync(uploadDir)) {
fs.mkdirSync(uploadDir, { recursive: true });
}
for (const [email, imageUrl] of Object.entries(matchedImages)) {
try {
console.log(`📥 Processing ${email}...`);
// Generate filename
const filename = `${email.replace('@', '_').replace(/\./g, '_')}.jpg`;
const filepath = path.join(uploadDir, filename);
// Download image
const response = await axios({
method: 'GET',
url: imageUrl,
responseType: 'stream'
});
const writer = fs.createWriteStream(filepath);
response.data.pipe(writer);
await new Promise((resolve, reject) => {
writer.on('finish', resolve);
writer.on('error', reject);
});
// Update user profile in database
const publicUrl = `/uploads/profiles/${filename}`;
await prisma.user.update({
where: { email: email },
data: { profilePicture: publicUrl }
});
console.log(` ✅ Successfully updated ${email} with profile picture: ${publicUrl}`);
} catch (error) {
console.log(` ❌ Failed to process ${email}: ${error.message}`);
continue;
}
}
console.log('\n🎉 Profile picture download and upload completed!');
}
// Run the script
scrapeADWProfilePictures().catch(console.error);