Files
memory-infrastructure-palace/code/utils/organize-pokemon-resources.js

297 lines
9.2 KiB
JavaScript

/**
* Organize Pokémon Resources
*
* Creates folder structure and renames files based on the order
* they appear on the pokemon.com resources page
*
* Usage:
* node code/utils/organize-pokemon-resources.js
* npm run organize:pokemon
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
puppeteer.use(StealthPlugin());
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const BASE_URL =
'https://www.pokemon.com/us/play-pokemon/about/tournaments-rules-and-resources';
const RESOURCES_DIR = path.resolve(
__dirname,
'../../docs/projects/pokemon-professor/Pokemon Rules & Resources'
);
/**
* Clean filename for filesystem
*/
function sanitizeFilename(name) {
return name
.replace(/[<>:"/\\|?*]/g, '-')
.replace(/\s+/g, ' ')
.trim();
}
/**
* Main organization function
*/
async function organizeResources() {
console.log('🚀 Starting Pokémon Resources Organization');
console.log(`📁 Resources directory: ${RESOURCES_DIR}\n`);
const browser = await puppeteer.launch({
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-blink-features=AutomationControlled'
]
});
try {
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setUserAgent(
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
);
console.log('🌐 Loading main page to extract structure...');
await page.goto(BASE_URL, { waitUntil: 'networkidle0', timeout: 90000 });
await new Promise(resolve => setTimeout(resolve, 3000));
// Extract the page structure: sections and their resources
const structure = await page.evaluate(() => {
const sections = [];
// Get the main content area
const mainContent =
document.querySelector('main, .main-content, article, #content') ||
document.body;
// Look for all text nodes and elements that might be section headers
// The page likely uses specific patterns for section titles
const allElements = Array.from(mainContent.querySelectorAll('*'));
for (let i = 0; i < allElements.length; i++) {
const element = allElements[i];
const text = element.innerText?.trim() || '';
// Check if this looks like a section header
// Pattern: ends with "Rules & Resources" or "Training Videos" or similar
const isSectionHeader =
(text.includes('Rules & Resources') ||
text.includes('Training Videos') ||
text === 'Further Resources for Players') &&
text.length < 100 &&
!text.includes('\n') &&
element.children.length < 3;
if (!isSectionHeader) continue;
const sectionTitle = text;
const links = [];
// Look ahead to find links belonging to this section
// Stop when we hit another section header
for (let j = i + 1; j < allElements.length && j < i + 50; j++) {
const nextEl = allElements[j];
const nextText = nextEl.innerText?.trim() || '';
// Stop if we hit another section header
if (
(nextText.includes('Rules & Resources') ||
nextText.includes('Training Videos') ||
nextText === 'Further Resources for Players') &&
nextText.length < 100 &&
!nextText.includes('\n') &&
nextEl.children.length < 3 &&
nextText !== sectionTitle
) {
break;
}
// Look for links in this element
if (nextEl.tagName === 'A' && nextEl.href) {
const linkText = nextEl.innerText.trim();
if (
linkText &&
!nextEl.href.includes('javascript:') &&
!nextEl.href.includes('#')
) {
links.push({ text: linkText, href: nextEl.href });
}
}
// Also check child links
const childLinks = nextEl.querySelectorAll('a[href]');
childLinks.forEach(a => {
const linkText = a.innerText.trim();
if (
linkText &&
!a.href.includes('javascript:') &&
!a.href.includes('#') &&
a.href.startsWith('http')
) {
links.push({ text: linkText, href: a.href });
}
});
}
if (links.length > 0) {
// Deduplicate links
const uniqueLinks = [];
const seen = new Set();
links.forEach(link => {
if (!seen.has(link.text)) {
seen.add(link.text);
uniqueLinks.push(link);
}
});
sections.push({
title: sectionTitle,
links: uniqueLinks
});
}
}
return sections;
});
console.log(`\n📋 Found ${structure.length} sections\n`);
// Deduplicate sections by title
const uniqueSections = [];
const seenTitles = new Set();
structure.forEach(section => {
if (!seenTitles.has(section.title)) {
seenTitles.add(section.title);
// Skip the main "Play! Pokémon Rules & Resources" section header
// as it's just a page title, not a content section
if (section.title !== 'Play! Pokémon Rules & Resources') {
uniqueSections.push(section);
}
}
});
console.log(
`📋 After deduplication: ${uniqueSections.length} unique sections\n`
);
// Get list of existing files
const existingFiles = fs
.readdirSync(RESOURCES_DIR)
.filter(
f =>
!f.startsWith('.') &&
!f.startsWith('debug') &&
(f.endsWith('.pdf') || f.endsWith('.txt'))
);
console.log(`📦 Found ${existingFiles.length} files to organize\n`);
let totalMoved = 0;
let sectionIndex = 1;
// Process each section
for (const section of uniqueSections) {
const sectionName = sanitizeFilename(section.title);
const folderName = `${sectionIndex.toString().padStart(2, '0')}-${sectionName}`;
const folderPath = path.join(RESOURCES_DIR, folderName);
// Create section folder
if (!fs.existsSync(folderPath)) {
fs.mkdirSync(folderPath, { recursive: true });
}
console.log(`📂 Section ${sectionIndex}: ${section.title}`);
console.log(` Folder: ${folderName}`);
console.log(` Resources: ${section.links.length}\n`);
let resourceIndex = 1;
// Process each resource in this section
for (const link of section.links) {
const resourceName = sanitizeFilename(link.text);
// Find matching file
const matchingFile = existingFiles.find(f => {
const baseName = f.replace(/\.(pdf|txt)$/, '');
return (
baseName === resourceName ||
baseName.includes(resourceName) ||
resourceName.includes(baseName)
);
});
if (matchingFile) {
const oldPath = path.join(RESOURCES_DIR, matchingFile);
const extension = path.extname(matchingFile);
const newName = `${sectionIndex.toString().padStart(2, '0')}-${resourceIndex.toString().padStart(2, '0')}-${resourceName}${extension}`;
const newPath = path.join(folderPath, newName);
// Move and rename file
if (fs.existsSync(oldPath)) {
fs.renameSync(oldPath, newPath);
console.log(`${resourceIndex}. ${resourceName}${extension}`);
totalMoved++;
}
} else {
console.log(` ⚠️ ${resourceIndex}. ${resourceName} (not found)`);
}
resourceIndex++;
}
console.log('');
sectionIndex++;
}
// Move any remaining files to an "Other" folder
const remainingFiles = fs
.readdirSync(RESOURCES_DIR)
.filter(
f =>
!f.startsWith('.') &&
!fs.statSync(path.join(RESOURCES_DIR, f)).isDirectory() &&
(f.endsWith('.pdf') || f.endsWith('.txt'))
);
if (remainingFiles.length > 0) {
const otherFolder = path.join(RESOURCES_DIR, '99-Other');
if (!fs.existsSync(otherFolder)) {
fs.mkdirSync(otherFolder, { recursive: true });
}
console.log(
`📂 Moving ${remainingFiles.length} unmatched files to Other folder\n`
);
remainingFiles.forEach((file, index) => {
const oldPath = path.join(RESOURCES_DIR, file);
const newName = `99-${(index + 1).toString().padStart(2, '0')}-${file}`;
const newPath = path.join(otherFolder, newName);
fs.renameSync(oldPath, newPath);
console.log(`${file}`);
});
}
console.log(`\n🎉 Organization complete!`);
console.log(`📊 Statistics:`);
console.log(` Sections created: ${sectionIndex - 1}`);
console.log(` Files organized: ${totalMoved}`);
console.log(` Files in Other: ${remainingFiles.length}`);
} catch (error) {
console.error('❌ Error:', error.message);
process.exit(1);
} finally {
await browser.close();
}
}
organizeResources();