297 lines
9.2 KiB
JavaScript
297 lines
9.2 KiB
JavaScript
/**
|
|
* Organize Pokémon Resources
|
|
*
|
|
* Creates folder structure and renames files based on the order
|
|
* they appear on the pokemon.com resources page
|
|
*
|
|
* Usage:
|
|
* node code/utils/organize-pokemon-resources.js
|
|
* npm run organize:pokemon
|
|
*/
|
|
import puppeteer from 'puppeteer-extra';
|
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|
import fs from 'fs';
|
|
import path from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
|
|
puppeteer.use(StealthPlugin());
|
|
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = path.dirname(__filename);
|
|
|
|
const BASE_URL =
|
|
'https://www.pokemon.com/us/play-pokemon/about/tournaments-rules-and-resources';
|
|
const RESOURCES_DIR = path.resolve(
|
|
__dirname,
|
|
'../../docs/projects/pokemon-professor/Pokemon Rules & Resources'
|
|
);
|
|
|
|
/**
|
|
* Clean filename for filesystem
|
|
*/
|
|
function sanitizeFilename(name) {
|
|
return name
|
|
.replace(/[<>:"/\\|?*]/g, '-')
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
}
|
|
|
|
/**
|
|
* Main organization function
|
|
*/
|
|
async function organizeResources() {
|
|
console.log('🚀 Starting Pokémon Resources Organization');
|
|
console.log(`📁 Resources directory: ${RESOURCES_DIR}\n`);
|
|
|
|
const browser = await puppeteer.launch({
|
|
headless: true,
|
|
args: [
|
|
'--no-sandbox',
|
|
'--disable-setuid-sandbox',
|
|
'--disable-blink-features=AutomationControlled'
|
|
]
|
|
});
|
|
|
|
try {
|
|
const page = await browser.newPage();
|
|
await page.setViewport({ width: 1920, height: 1080 });
|
|
await page.setUserAgent(
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
);
|
|
|
|
console.log('🌐 Loading main page to extract structure...');
|
|
await page.goto(BASE_URL, { waitUntil: 'networkidle0', timeout: 90000 });
|
|
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
|
|
// Extract the page structure: sections and their resources
|
|
const structure = await page.evaluate(() => {
|
|
const sections = [];
|
|
|
|
// Get the main content area
|
|
const mainContent =
|
|
document.querySelector('main, .main-content, article, #content') ||
|
|
document.body;
|
|
|
|
// Look for all text nodes and elements that might be section headers
|
|
// The page likely uses specific patterns for section titles
|
|
const allElements = Array.from(mainContent.querySelectorAll('*'));
|
|
|
|
for (let i = 0; i < allElements.length; i++) {
|
|
const element = allElements[i];
|
|
const text = element.innerText?.trim() || '';
|
|
|
|
// Check if this looks like a section header
|
|
// Pattern: ends with "Rules & Resources" or "Training Videos" or similar
|
|
const isSectionHeader =
|
|
(text.includes('Rules & Resources') ||
|
|
text.includes('Training Videos') ||
|
|
text === 'Further Resources for Players') &&
|
|
text.length < 100 &&
|
|
!text.includes('\n') &&
|
|
element.children.length < 3;
|
|
|
|
if (!isSectionHeader) continue;
|
|
|
|
const sectionTitle = text;
|
|
const links = [];
|
|
|
|
// Look ahead to find links belonging to this section
|
|
// Stop when we hit another section header
|
|
for (let j = i + 1; j < allElements.length && j < i + 50; j++) {
|
|
const nextEl = allElements[j];
|
|
const nextText = nextEl.innerText?.trim() || '';
|
|
|
|
// Stop if we hit another section header
|
|
if (
|
|
(nextText.includes('Rules & Resources') ||
|
|
nextText.includes('Training Videos') ||
|
|
nextText === 'Further Resources for Players') &&
|
|
nextText.length < 100 &&
|
|
!nextText.includes('\n') &&
|
|
nextEl.children.length < 3 &&
|
|
nextText !== sectionTitle
|
|
) {
|
|
break;
|
|
}
|
|
|
|
// Look for links in this element
|
|
if (nextEl.tagName === 'A' && nextEl.href) {
|
|
const linkText = nextEl.innerText.trim();
|
|
if (
|
|
linkText &&
|
|
!nextEl.href.includes('javascript:') &&
|
|
!nextEl.href.includes('#')
|
|
) {
|
|
links.push({ text: linkText, href: nextEl.href });
|
|
}
|
|
}
|
|
|
|
// Also check child links
|
|
const childLinks = nextEl.querySelectorAll('a[href]');
|
|
childLinks.forEach(a => {
|
|
const linkText = a.innerText.trim();
|
|
if (
|
|
linkText &&
|
|
!a.href.includes('javascript:') &&
|
|
!a.href.includes('#') &&
|
|
a.href.startsWith('http')
|
|
) {
|
|
links.push({ text: linkText, href: a.href });
|
|
}
|
|
});
|
|
}
|
|
|
|
if (links.length > 0) {
|
|
// Deduplicate links
|
|
const uniqueLinks = [];
|
|
const seen = new Set();
|
|
links.forEach(link => {
|
|
if (!seen.has(link.text)) {
|
|
seen.add(link.text);
|
|
uniqueLinks.push(link);
|
|
}
|
|
});
|
|
|
|
sections.push({
|
|
title: sectionTitle,
|
|
links: uniqueLinks
|
|
});
|
|
}
|
|
}
|
|
|
|
return sections;
|
|
});
|
|
|
|
console.log(`\n📋 Found ${structure.length} sections\n`);
|
|
|
|
// Deduplicate sections by title
|
|
const uniqueSections = [];
|
|
const seenTitles = new Set();
|
|
structure.forEach(section => {
|
|
if (!seenTitles.has(section.title)) {
|
|
seenTitles.add(section.title);
|
|
// Skip the main "Play! Pokémon Rules & Resources" section header
|
|
// as it's just a page title, not a content section
|
|
if (section.title !== 'Play! Pokémon Rules & Resources') {
|
|
uniqueSections.push(section);
|
|
}
|
|
}
|
|
});
|
|
|
|
console.log(
|
|
`📋 After deduplication: ${uniqueSections.length} unique sections\n`
|
|
);
|
|
|
|
// Get list of existing files
|
|
const existingFiles = fs
|
|
.readdirSync(RESOURCES_DIR)
|
|
.filter(
|
|
f =>
|
|
!f.startsWith('.') &&
|
|
!f.startsWith('debug') &&
|
|
(f.endsWith('.pdf') || f.endsWith('.txt'))
|
|
);
|
|
|
|
console.log(`📦 Found ${existingFiles.length} files to organize\n`);
|
|
|
|
let totalMoved = 0;
|
|
let sectionIndex = 1;
|
|
|
|
// Process each section
|
|
for (const section of uniqueSections) {
|
|
const sectionName = sanitizeFilename(section.title);
|
|
const folderName = `${sectionIndex.toString().padStart(2, '0')}-${sectionName}`;
|
|
const folderPath = path.join(RESOURCES_DIR, folderName);
|
|
|
|
// Create section folder
|
|
if (!fs.existsSync(folderPath)) {
|
|
fs.mkdirSync(folderPath, { recursive: true });
|
|
}
|
|
|
|
console.log(`📂 Section ${sectionIndex}: ${section.title}`);
|
|
console.log(` Folder: ${folderName}`);
|
|
console.log(` Resources: ${section.links.length}\n`);
|
|
|
|
let resourceIndex = 1;
|
|
|
|
// Process each resource in this section
|
|
for (const link of section.links) {
|
|
const resourceName = sanitizeFilename(link.text);
|
|
|
|
// Find matching file
|
|
const matchingFile = existingFiles.find(f => {
|
|
const baseName = f.replace(/\.(pdf|txt)$/, '');
|
|
return (
|
|
baseName === resourceName ||
|
|
baseName.includes(resourceName) ||
|
|
resourceName.includes(baseName)
|
|
);
|
|
});
|
|
|
|
if (matchingFile) {
|
|
const oldPath = path.join(RESOURCES_DIR, matchingFile);
|
|
const extension = path.extname(matchingFile);
|
|
const newName = `${sectionIndex.toString().padStart(2, '0')}-${resourceIndex.toString().padStart(2, '0')}-${resourceName}${extension}`;
|
|
const newPath = path.join(folderPath, newName);
|
|
|
|
// Move and rename file
|
|
if (fs.existsSync(oldPath)) {
|
|
fs.renameSync(oldPath, newPath);
|
|
console.log(` ✅ ${resourceIndex}. ${resourceName}${extension}`);
|
|
totalMoved++;
|
|
}
|
|
} else {
|
|
console.log(` ⚠️ ${resourceIndex}. ${resourceName} (not found)`);
|
|
}
|
|
|
|
resourceIndex++;
|
|
}
|
|
|
|
console.log('');
|
|
sectionIndex++;
|
|
}
|
|
|
|
// Move any remaining files to an "Other" folder
|
|
const remainingFiles = fs
|
|
.readdirSync(RESOURCES_DIR)
|
|
.filter(
|
|
f =>
|
|
!f.startsWith('.') &&
|
|
!fs.statSync(path.join(RESOURCES_DIR, f)).isDirectory() &&
|
|
(f.endsWith('.pdf') || f.endsWith('.txt'))
|
|
);
|
|
|
|
if (remainingFiles.length > 0) {
|
|
const otherFolder = path.join(RESOURCES_DIR, '99-Other');
|
|
if (!fs.existsSync(otherFolder)) {
|
|
fs.mkdirSync(otherFolder, { recursive: true });
|
|
}
|
|
|
|
console.log(
|
|
`📂 Moving ${remainingFiles.length} unmatched files to Other folder\n`
|
|
);
|
|
|
|
remainingFiles.forEach((file, index) => {
|
|
const oldPath = path.join(RESOURCES_DIR, file);
|
|
const newName = `99-${(index + 1).toString().padStart(2, '0')}-${file}`;
|
|
const newPath = path.join(otherFolder, newName);
|
|
fs.renameSync(oldPath, newPath);
|
|
console.log(` ✅ ${file}`);
|
|
});
|
|
}
|
|
|
|
console.log(`\n🎉 Organization complete!`);
|
|
console.log(`📊 Statistics:`);
|
|
console.log(` Sections created: ${sectionIndex - 1}`);
|
|
console.log(` Files organized: ${totalMoved}`);
|
|
console.log(` Files in Other: ${remainingFiles.length}`);
|
|
} catch (error) {
|
|
console.error('❌ Error:', error.message);
|
|
process.exit(1);
|
|
} finally {
|
|
await browser.close();
|
|
}
|
|
}
|
|
|
|
organizeResources();
|