![]() Server : Apache/2 System : Linux server-15-235-50-60 5.15.0-164-generic #174-Ubuntu SMP Fri Nov 14 20:25:16 UTC 2025 x86_64 User : gositeme ( 1004) PHP Version : 8.2.29 Disable Function : exec,system,passthru,shell_exec,proc_close,proc_open,dl,popen,show_source,posix_kill,posix_mkfifo,posix_getpwuid,posix_setpgid,posix_setsid,posix_setuid,posix_setgid,posix_seteuid,posix_setegid,posix_uname Directory : /home/gositeme/domains/lavocat.quebec/private_html/scripts/ |
const axios = require('axios');
const cheerio = require('cheerio');
const fs = require('fs');
const path = require('path');
const https = require('https');
const BASE_URL = 'https://www.adwavocats.com';
const TEAM_URL = BASE_URL + '/a-propos/';
const OUTPUT_DIR = path.join(__dirname, '../public/images/lawyers');
const expectedLawyers = [
'justin-wee',
'alain-arsenault',
'audrey-labrecque',
'marie-claude-tremblay',
'david-chen',
'sophie-dubois',
'marc-andre-bouchard',
'isabella-rodriguez',
'thomas-leblanc',
'virginie-dufresne-lemire',
'jerome-aucoin',
'antoine-duranleau-hendrickx',
'ivan-lazarov',
'yalda-machouf-khadir',
'olivia-malenfant',
'imane-melab',
'justine-monty',
'mmah-nora-toure'
];
function slugify(str) {
return str
.toLowerCase()
.replace(/é/g, 'e')
.replace(/è/g, 'e')
.replace(/ê/g, 'e')
.replace(/à/g, 'a')
.replace(/ç/g, 'c')
.replace(/ô/g, 'o')
.replace(/î/g, 'i')
.replace(/ï/g, 'i')
.replace(/'/g, '')
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '');
}
async function downloadImage(url, filename) {
const filePath = path.join(OUTPUT_DIR, filename);
const file = fs.createWriteStream(filePath);
return new Promise((resolve, reject) => {
https.get(url, (response) => {
response.pipe(file);
file.on('finish', () => {
file.close(resolve);
});
}).on('error', (err) => {
fs.unlink(filePath, () => {});
reject(err.message);
});
});
}
(async () => {
if (!fs.existsSync(OUTPUT_DIR)) {
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
}
const { data: html } = await axios.get(TEAM_URL);
const $ = cheerio.load(html);
const found = {};
// 1. Try to get images from <img> tags
$('img').each((i, el) => {
const alt = $(el).attr('alt') || '';
let src = $(el).attr('src') || '';
let srcset = $(el).attr('srcset') || '';
let dataSrc = $(el).attr('data-src') || '';
let name = alt.replace(/(Me |Avocat|Avocate|Me\.|Mme\.|M\.|\(.*\))/gi, '').trim();
const slug = slugify(name);
let url = '';
if (srcset && srcset.includes('.webp')) {
url = srcset.split(',').find(s => s.includes('.webp')).split(' ')[0];
} else if (src.endsWith('.webp')) {
url = src;
} else if (dataSrc.endsWith('.webp')) {
url = dataSrc;
} else if (src) {
url = src;
}
if (slug && url && url.match(/wp-content.*\.(webp|jpg|jpeg|png)$/i)) {
found[slug] = url.startsWith('http') ? url : BASE_URL + url;
}
});
// 2. Fallback: search for image URLs in raw HTML for any missing
for (const slug of expectedLawyers) {
if (!found[slug]) {
// Try to find a matching image URL in the HTML
const regex = new RegExp(`wp-content[^"']*${slug}[^"']*\\.(webp|jpg|jpeg|png)`, 'i');
const match = html.match(regex);
if (match) {
found[slug] = match[0].startsWith('http') ? match[0] : BASE_URL + '/' + match[0].replace(/^\//, '');
}
}
}
// 3. Download all found images
for (const slug of expectedLawyers) {
if (found[slug]) {
const ext = path.extname(found[slug].split('?')[0]);
const filename = slug + ext;
try {
await downloadImage(found[slug], filename);
console.log(`Downloaded: ${filename}`);
} catch (e) {
console.error(`Failed to download ${filename}:`, e);
}
} else {
console.log(`No image found for: ${slug}`);
}
}
})();