/** * Organize Pokémon Resources * * Creates folder structure and renames files based on the order * they appear on the pokemon.com resources page * * Usage: * node code/utils/organize-pokemon-resources.js * npm run organize:pokemon */ import puppeteer from 'puppeteer-extra'; import StealthPlugin from 'puppeteer-extra-plugin-stealth'; import fs from 'fs'; import path from 'path'; import { fileURLToPath } from 'url'; puppeteer.use(StealthPlugin()); const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const BASE_URL = 'https://www.pokemon.com/us/play-pokemon/about/tournaments-rules-and-resources'; const RESOURCES_DIR = path.resolve( __dirname, '../../docs/projects/pokemon-professor/Pokemon Rules & Resources' ); /** * Clean filename for filesystem */ function sanitizeFilename(name) { return name .replace(/[<>:"/\\|?*]/g, '-') .replace(/\s+/g, ' ') .trim(); } /** * Main organization function */ async function organizeResources() { console.log('🚀 Starting Pokémon Resources Organization'); console.log(`📁 Resources directory: ${RESOURCES_DIR}\n`); const browser = await puppeteer.launch({ headless: true, args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-blink-features=AutomationControlled' ] }); try { const page = await browser.newPage(); await page.setViewport({ width: 1920, height: 1080 }); await page.setUserAgent( 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' ); console.log('🌐 Loading main page to extract structure...'); await page.goto(BASE_URL, { waitUntil: 'networkidle0', timeout: 90000 }); await new Promise(resolve => setTimeout(resolve, 3000)); // Extract the page structure: sections and their resources const structure = await page.evaluate(() => { const sections = []; // Get the main content area const mainContent = document.querySelector('main, .main-content, article, #content') || document.body; // Look for all text nodes and elements that might be section headers // The page likely uses specific patterns for section titles const allElements = Array.from(mainContent.querySelectorAll('*')); for (let i = 0; i < allElements.length; i++) { const element = allElements[i]; const text = element.innerText?.trim() || ''; // Check if this looks like a section header // Pattern: ends with "Rules & Resources" or "Training Videos" or similar const isSectionHeader = (text.includes('Rules & Resources') || text.includes('Training Videos') || text === 'Further Resources for Players') && text.length < 100 && !text.includes('\n') && element.children.length < 3; if (!isSectionHeader) continue; const sectionTitle = text; const links = []; // Look ahead to find links belonging to this section // Stop when we hit another section header for (let j = i + 1; j < allElements.length && j < i + 50; j++) { const nextEl = allElements[j]; const nextText = nextEl.innerText?.trim() || ''; // Stop if we hit another section header if ( (nextText.includes('Rules & Resources') || nextText.includes('Training Videos') || nextText === 'Further Resources for Players') && nextText.length < 100 && !nextText.includes('\n') && nextEl.children.length < 3 && nextText !== sectionTitle ) { break; } // Look for links in this element if (nextEl.tagName === 'A' && nextEl.href) { const linkText = nextEl.innerText.trim(); if ( linkText && !nextEl.href.includes('javascript:') && !nextEl.href.includes('#') ) { links.push({ text: linkText, href: nextEl.href }); } } // Also check child links const childLinks = nextEl.querySelectorAll('a[href]'); childLinks.forEach(a => { const linkText = a.innerText.trim(); if ( linkText && !a.href.includes('javascript:') && !a.href.includes('#') && a.href.startsWith('http') ) { links.push({ text: linkText, href: a.href }); } }); } if (links.length > 0) { // Deduplicate links const uniqueLinks = []; const seen = new Set(); links.forEach(link => { if (!seen.has(link.text)) { seen.add(link.text); uniqueLinks.push(link); } }); sections.push({ title: sectionTitle, links: uniqueLinks }); } } return sections; }); console.log(`\n📋 Found ${structure.length} sections\n`); // Deduplicate sections by title const uniqueSections = []; const seenTitles = new Set(); structure.forEach(section => { if (!seenTitles.has(section.title)) { seenTitles.add(section.title); // Skip the main "Play! Pokémon Rules & Resources" section header // as it's just a page title, not a content section if (section.title !== 'Play! Pokémon Rules & Resources') { uniqueSections.push(section); } } }); console.log( `📋 After deduplication: ${uniqueSections.length} unique sections\n` ); // Get list of existing files const existingFiles = fs .readdirSync(RESOURCES_DIR) .filter( f => !f.startsWith('.') && !f.startsWith('debug') && (f.endsWith('.pdf') || f.endsWith('.txt')) ); console.log(`📦 Found ${existingFiles.length} files to organize\n`); let totalMoved = 0; let sectionIndex = 1; // Process each section for (const section of uniqueSections) { const sectionName = sanitizeFilename(section.title); const folderName = `${sectionIndex.toString().padStart(2, '0')}-${sectionName}`; const folderPath = path.join(RESOURCES_DIR, folderName); // Create section folder if (!fs.existsSync(folderPath)) { fs.mkdirSync(folderPath, { recursive: true }); } console.log(`📂 Section ${sectionIndex}: ${section.title}`); console.log(` Folder: ${folderName}`); console.log(` Resources: ${section.links.length}\n`); let resourceIndex = 1; // Process each resource in this section for (const link of section.links) { const resourceName = sanitizeFilename(link.text); // Find matching file const matchingFile = existingFiles.find(f => { const baseName = f.replace(/\.(pdf|txt)$/, ''); return ( baseName === resourceName || baseName.includes(resourceName) || resourceName.includes(baseName) ); }); if (matchingFile) { const oldPath = path.join(RESOURCES_DIR, matchingFile); const extension = path.extname(matchingFile); const newName = `${sectionIndex.toString().padStart(2, '0')}-${resourceIndex.toString().padStart(2, '0')}-${resourceName}${extension}`; const newPath = path.join(folderPath, newName); // Move and rename file if (fs.existsSync(oldPath)) { fs.renameSync(oldPath, newPath); console.log(` ✅ ${resourceIndex}. ${resourceName}${extension}`); totalMoved++; } } else { console.log(` ⚠️ ${resourceIndex}. ${resourceName} (not found)`); } resourceIndex++; } console.log(''); sectionIndex++; } // Move any remaining files to an "Other" folder const remainingFiles = fs .readdirSync(RESOURCES_DIR) .filter( f => !f.startsWith('.') && !fs.statSync(path.join(RESOURCES_DIR, f)).isDirectory() && (f.endsWith('.pdf') || f.endsWith('.txt')) ); if (remainingFiles.length > 0) { const otherFolder = path.join(RESOURCES_DIR, '99-Other'); if (!fs.existsSync(otherFolder)) { fs.mkdirSync(otherFolder, { recursive: true }); } console.log( `📂 Moving ${remainingFiles.length} unmatched files to Other folder\n` ); remainingFiles.forEach((file, index) => { const oldPath = path.join(RESOURCES_DIR, file); const newName = `99-${(index + 1).toString().padStart(2, '0')}-${file}`; const newPath = path.join(otherFolder, newName); fs.renameSync(oldPath, newPath); console.log(` ✅ ${file}`); }); } console.log(`\n🎉 Organization complete!`); console.log(`📊 Statistics:`); console.log(` Sections created: ${sectionIndex - 1}`); console.log(` Files organized: ${totalMoved}`); console.log(` Files in Other: ${remainingFiles.length}`); } catch (error) { console.error('❌ Error:', error.message); process.exit(1); } finally { await browser.close(); } } organizeResources();