🗑️ Remove unused and archived files across multiple directories and update project dependencies in package files
This commit is contained in:
396
code/utils/deploy-pokedex.js
Normal file
396
code/utils/deploy-pokedex.js
Normal file
@@ -0,0 +1,396 @@
|
||||
/**
|
||||
* Pokedex.Online Deployment Script
|
||||
*
|
||||
* Deploys the pokedex.online Docker container to Synology NAS via SSH.
|
||||
* - Connects to Synology using configured SSH hosts
|
||||
* - Transfers files via SFTP
|
||||
* - Manages Docker deployment with rollback on failure
|
||||
* - Performs health check to verify deployment
|
||||
*
|
||||
* Usage:
|
||||
* node code/utils/deploy-pokedex.js [--target internal|external] [--port 8080] [--ssl-port 8443]
|
||||
* npm run deploy:pokedex -- --target external --port 8081 --ssl-port 8444
|
||||
*
|
||||
* Examples:
|
||||
* npm run deploy:pokedex # Deploy to internal (10.0.0.81) on port 8080
|
||||
* npm run deploy:pokedex -- --target external # Deploy to external (home.gregrjacobs.com)
|
||||
* npm run deploy:pokedex -- --port 8081 # Deploy to internal on port 8081
|
||||
* npm run deploy:pokedex -- --port 8080 --ssl-port 8443 # Deploy with HTTPS on port 8443
|
||||
* npm run deploy:pokedex -- --target external --port 3000 --ssl-port 3443
|
||||
*/
|
||||
import { NodeSSH } from 'node-ssh';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import http from 'http';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
// Configuration
|
||||
const SSH_HOSTS = {
|
||||
internal: {
|
||||
host: '10.0.0.81',
|
||||
port: 2323,
|
||||
username: 'GregRJacobs',
|
||||
privateKeyPath: '~/.ssh/ds3627xs_gregrjacobs',
|
||||
password: 'J@Cubs88'
|
||||
},
|
||||
external: {
|
||||
host: 'home.gregrjacobs.com',
|
||||
port: 2323,
|
||||
username: 'GregRJacobs',
|
||||
privateKeyPath: '~/.ssh/ds3627xs_gregrjacobs',
|
||||
password: 'J@Cubs88'
|
||||
}
|
||||
};
|
||||
|
||||
const REMOTE_PATH = '/volume1/docker/pokedex-online/base';
|
||||
const CONTAINER_NAME = 'pokedex-online';
|
||||
const SOURCE_DIR = path.resolve(__dirname, '../websites/pokedex.online/apps');
|
||||
|
||||
/**
|
||||
* Parse command line arguments
|
||||
* @returns {Object} Parsed arguments
|
||||
*/
|
||||
function parseArgs() {
|
||||
const args = process.argv.slice(2);
|
||||
const config = {
|
||||
target: 'internal',
|
||||
port: 8080,
|
||||
sslPort: null
|
||||
};
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
if (args[i] === '--target' && args[i + 1]) {
|
||||
config.target = args[i + 1];
|
||||
i++;
|
||||
} else if (args[i] === '--port' && args[i + 1]) {
|
||||
config.port = parseInt(args[i + 1], 10);
|
||||
i++;
|
||||
} else if (args[i] === '--ssl-port' && args[i + 1]) {
|
||||
config.sslPort = parseInt(args[i + 1], 10);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
// Validate target
|
||||
if (!SSH_HOSTS[config.target]) {
|
||||
throw new Error(
|
||||
`Invalid target: ${config.target}. Must be 'internal' or 'external'.`
|
||||
);
|
||||
}
|
||||
|
||||
// Validate port
|
||||
if (isNaN(config.port) || config.port < 1 || config.port > 65535) {
|
||||
throw new Error(
|
||||
`Invalid port: ${config.port}. Must be between 1 and 65535.`
|
||||
);
|
||||
}
|
||||
|
||||
// Validate SSL port if provided
|
||||
if (
|
||||
config.sslPort !== null &&
|
||||
(isNaN(config.sslPort) || config.sslPort < 1 || config.sslPort > 65535)
|
||||
) {
|
||||
throw new Error(
|
||||
`Invalid SSL port: ${config.sslPort}. Must be between 1 and 65535.`
|
||||
);
|
||||
}
|
||||
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expand tilde in file paths
|
||||
* @param {string} filepath - Path potentially starting with ~
|
||||
* @returns {string} Expanded path
|
||||
*/
|
||||
function expandTilde(filepath) {
|
||||
if (filepath.startsWith('~/')) {
|
||||
return path.join(process.env.HOME, filepath.slice(2));
|
||||
}
|
||||
return filepath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create modified docker-compose.yml with custom ports
|
||||
* @param {number} port - HTTP port to map to container
|
||||
* @param {number|null} sslPort - HTTPS port to map to container (optional)
|
||||
* @returns {string} Modified docker-compose content
|
||||
*/
|
||||
function createModifiedDockerCompose(port, sslPort) {
|
||||
const originalPath = path.join(SOURCE_DIR, 'docker-compose.yml');
|
||||
let content = fs.readFileSync(originalPath, 'utf8');
|
||||
|
||||
// Replace HTTP port mapping (handle both single and double quotes)
|
||||
content = content.replace(/- ['"](\d+):80['"]/, `- '${port}:80'`);
|
||||
|
||||
// Replace HTTPS port mapping if SSL port provided
|
||||
if (sslPort !== null) {
|
||||
content = content.replace(/- ['"](\d+):443['"]/, `- '${sslPort}:443'`);
|
||||
} else {
|
||||
// Remove HTTPS port mapping if no SSL port specified
|
||||
content = content.replace(/\s*- ['"](\d+):443['"]/g, '');
|
||||
}
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform HTTP health check
|
||||
* @param {string} host - Host to check
|
||||
* @param {number} port - Port to check
|
||||
* @param {number} retries - Number of retries
|
||||
* @returns {Promise<boolean>} True if healthy
|
||||
*/
|
||||
async function healthCheck(host, port, retries = 5) {
|
||||
for (let i = 0; i < retries; i++) {
|
||||
try {
|
||||
await new Promise((resolve, reject) => {
|
||||
const req = http.get(
|
||||
`http://${host}:${port}`,
|
||||
{ timeout: 5000 },
|
||||
res => {
|
||||
if (res.statusCode === 200) {
|
||||
resolve();
|
||||
} else {
|
||||
reject(new Error(`HTTP ${res.statusCode}`));
|
||||
}
|
||||
}
|
||||
);
|
||||
req.on('error', reject);
|
||||
req.on('timeout', () => {
|
||||
req.destroy();
|
||||
reject(new Error('Request timeout'));
|
||||
});
|
||||
});
|
||||
return true;
|
||||
} catch (error) {
|
||||
if (i < retries - 1) {
|
||||
console.log(
|
||||
`⏳ Health check attempt ${i + 1}/${retries} failed, retrying in 3s...`
|
||||
);
|
||||
await new Promise(resolve => setTimeout(resolve, 3000));
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Main deployment function
|
||||
*/
|
||||
async function deploy() {
|
||||
const ssh = new NodeSSH();
|
||||
let previousImage = null;
|
||||
let containerExisted = false;
|
||||
|
||||
try {
|
||||
// Parse arguments
|
||||
const config = parseArgs();
|
||||
const sshConfig = SSH_HOSTS[config.target];
|
||||
|
||||
console.log('🚀 Starting Pokedex.Online deployment');
|
||||
console.log(
|
||||
`📡 Target: ${config.target} (${sshConfig.host}:${sshConfig.port})`
|
||||
);
|
||||
console.log(`🔌 HTTP Port: ${config.port}`);
|
||||
if (config.sslPort) {
|
||||
console.log(`🔒 HTTPS Port: ${config.sslPort}`);
|
||||
}
|
||||
|
||||
// Connect to Synology
|
||||
console.log('\n🔐 Connecting to Synology...');
|
||||
await ssh.connect({
|
||||
host: sshConfig.host,
|
||||
port: sshConfig.port,
|
||||
username: sshConfig.username,
|
||||
privateKeyPath: expandTilde(sshConfig.privateKeyPath),
|
||||
password: sshConfig.password,
|
||||
tryKeyboard: true
|
||||
});
|
||||
console.log('✅ Connected successfully');
|
||||
|
||||
// Check if container exists and capture current image
|
||||
console.log('\n📦 Checking for existing container...');
|
||||
console.log(` Container name: ${CONTAINER_NAME}`);
|
||||
try {
|
||||
const result = await ssh.execCommand(
|
||||
`/usr/local/bin/docker inspect --format='{{.Image}}' ${CONTAINER_NAME} || /usr/bin/docker inspect --format='{{.Image}}' ${CONTAINER_NAME}`
|
||||
);
|
||||
console.log(` Command exit code: ${result.code}`);
|
||||
if (result.stdout) console.log(` Stdout: ${result.stdout.trim()}`);
|
||||
if (result.stderr) console.log(` Stderr: ${result.stderr.trim()}`);
|
||||
|
||||
if (result.code === 0 && result.stdout.trim()) {
|
||||
previousImage = result.stdout.trim();
|
||||
containerExisted = true;
|
||||
console.log(
|
||||
`✅ Found existing container (image: ${previousImage.substring(0, 12)}...)`
|
||||
);
|
||||
} else {
|
||||
console.log('ℹ️ No existing container found');
|
||||
}
|
||||
} catch (error) {
|
||||
console.log(` Error: ${error.message}`);
|
||||
console.log('ℹ️ No existing container found');
|
||||
}
|
||||
|
||||
// Create remote directory
|
||||
console.log('\n📁 Creating remote directory...');
|
||||
const mkdirResult = await ssh.execCommand(`mkdir -p ${REMOTE_PATH}`);
|
||||
console.log(` Command: mkdir -p ${REMOTE_PATH}`);
|
||||
if (mkdirResult.stdout) console.log(` Output: ${mkdirResult.stdout}`);
|
||||
if (mkdirResult.stderr) console.log(` Stderr: ${mkdirResult.stderr}`);
|
||||
console.log(` ✅ Directory ready`);
|
||||
|
||||
// Create modified docker-compose.yml
|
||||
const modifiedDockerCompose = createModifiedDockerCompose(
|
||||
config.port,
|
||||
config.sslPort
|
||||
);
|
||||
const tempDockerComposePath = path.join(
|
||||
SOURCE_DIR,
|
||||
'docker-compose.tmp.yml'
|
||||
);
|
||||
fs.writeFileSync(tempDockerComposePath, modifiedDockerCompose);
|
||||
|
||||
// Transfer files
|
||||
console.log('\n📤 Transferring files...');
|
||||
const filesToTransfer = [
|
||||
{
|
||||
local: path.join(SOURCE_DIR, 'index.html'),
|
||||
remote: `${REMOTE_PATH}/index.html`
|
||||
},
|
||||
{
|
||||
local: path.join(SOURCE_DIR, 'Dockerfile'),
|
||||
remote: `${REMOTE_PATH}/Dockerfile`
|
||||
},
|
||||
{
|
||||
local: tempDockerComposePath,
|
||||
remote: `${REMOTE_PATH}/docker-compose.yml`
|
||||
}
|
||||
];
|
||||
|
||||
for (const file of filesToTransfer) {
|
||||
try {
|
||||
await ssh.putFile(file.local, file.remote);
|
||||
console.log(` ✅ ${path.basename(file.local)}`);
|
||||
} catch (error) {
|
||||
// If SFTP fails, fall back to cat method
|
||||
console.log(
|
||||
` ⚠️ SFTP failed for ${path.basename(file.local)}, using cat fallback...`
|
||||
);
|
||||
const fileContent = fs.readFileSync(file.local, 'utf8');
|
||||
const escapedContent = fileContent.replace(/'/g, "'\\''");
|
||||
const catResult = await ssh.execCommand(
|
||||
`cat > '${file.remote}' << 'EOFMARKER'\n${fileContent}\nEOFMARKER`
|
||||
);
|
||||
if (catResult.stdout) console.log(` Output: ${catResult.stdout}`);
|
||||
if (catResult.stderr) console.log(` Stderr: ${catResult.stderr}`);
|
||||
if (catResult.code !== 0) {
|
||||
throw new Error(
|
||||
`Failed to transfer ${path.basename(file.local)}: ${catResult.stderr}`
|
||||
);
|
||||
}
|
||||
console.log(
|
||||
` ✅ ${path.basename(file.local)} (${fs.statSync(file.local).size} bytes)`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up temp file
|
||||
fs.unlinkSync(tempDockerComposePath);
|
||||
|
||||
// Stop existing container first to avoid port conflicts
|
||||
if (containerExisted) {
|
||||
console.log('\n🛑 Stopping existing container...');
|
||||
const stopResult = await ssh.execCommand(
|
||||
`cd ${REMOTE_PATH} && /usr/local/bin/docker compose down || /usr/local/bin/docker-compose down`
|
||||
);
|
||||
if (stopResult.stdout) console.log(` ${stopResult.stdout.trim()}`);
|
||||
console.log(' ✅ Container stopped');
|
||||
}
|
||||
|
||||
// Deploy with docker-compose
|
||||
console.log('\n🐳 Building and starting Docker container...');
|
||||
console.log(` Working directory: ${REMOTE_PATH}`);
|
||||
|
||||
// Try Docker Compose V2 first (docker compose), then fall back to V1 (docker-compose)
|
||||
// Use full paths for Synology
|
||||
console.log(' Attempting: /usr/local/bin/docker compose up -d --build');
|
||||
let deployResult = await ssh.execCommand(
|
||||
`cd ${REMOTE_PATH} && /usr/local/bin/docker compose up -d --build || /usr/local/bin/docker-compose up -d --build || /usr/bin/docker compose up -d --build`,
|
||||
{ stream: 'both' }
|
||||
);
|
||||
|
||||
console.log('\n 📋 Docker Output:');
|
||||
if (deployResult.stdout) {
|
||||
deployResult.stdout.split('\n').forEach(line => {
|
||||
if (line.trim()) console.log(` ${line}`);
|
||||
});
|
||||
}
|
||||
if (deployResult.stderr) {
|
||||
console.log('\n ⚠️ Docker Stderr:');
|
||||
deployResult.stderr.split('\n').forEach(line => {
|
||||
if (line.trim()) console.log(` ${line}`);
|
||||
});
|
||||
}
|
||||
console.log(` Exit code: ${deployResult.code}`);
|
||||
|
||||
if (deployResult.code !== 0) {
|
||||
throw new Error(`Docker deployment failed: ${deployResult.stderr}`);
|
||||
}
|
||||
|
||||
console.log('\n✅ Container started');
|
||||
|
||||
// Health check
|
||||
console.log('\n🏥 Performing health check...');
|
||||
const isHealthy = await healthCheck(sshConfig.host, config.port);
|
||||
|
||||
if (!isHealthy) {
|
||||
throw new Error('Health check failed - container is not responding');
|
||||
}
|
||||
|
||||
console.log('✅ Health check passed');
|
||||
console.log(`\n🎉 Deployment successful!`);
|
||||
console.log(`🌐 HTTP: http://${sshConfig.host}:${config.port}`);
|
||||
if (config.sslPort) {
|
||||
console.log(`🔒 HTTPS: https://${sshConfig.host}:${config.sslPort}`);
|
||||
}
|
||||
|
||||
ssh.dispose();
|
||||
} catch (error) {
|
||||
console.error('\n❌ Deployment failed:', error.message);
|
||||
|
||||
// Rollback
|
||||
if (previousImage) {
|
||||
console.log('\n🔄 Rolling back to previous image...');
|
||||
try {
|
||||
await ssh.execCommand(
|
||||
`cd ${REMOTE_PATH} && docker-compose down && docker tag ${previousImage} pokedex-online:latest && docker-compose up -d`
|
||||
);
|
||||
console.log('✅ Rollback successful');
|
||||
} catch (rollbackError) {
|
||||
console.error('❌ Rollback failed:', rollbackError.message);
|
||||
}
|
||||
} else if (containerExisted === false) {
|
||||
console.log('\n🧹 Cleaning up failed deployment...');
|
||||
try {
|
||||
await ssh.execCommand(
|
||||
`cd ${REMOTE_PATH} && docker-compose down --volumes --remove-orphans`
|
||||
);
|
||||
console.log('✅ Cleanup successful');
|
||||
} catch (cleanupError) {
|
||||
console.error('❌ Cleanup failed:', cleanupError.message);
|
||||
}
|
||||
}
|
||||
|
||||
ssh.dispose();
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run deployment
|
||||
deploy();
|
||||
296
code/utils/organize-pokemon-resources.js
Normal file
296
code/utils/organize-pokemon-resources.js
Normal file
@@ -0,0 +1,296 @@
|
||||
/**
|
||||
* Organize Pokémon Resources
|
||||
*
|
||||
* Creates folder structure and renames files based on the order
|
||||
* they appear on the pokemon.com resources page
|
||||
*
|
||||
* Usage:
|
||||
* node code/utils/organize-pokemon-resources.js
|
||||
* npm run organize:pokemon
|
||||
*/
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
const BASE_URL =
|
||||
'https://www.pokemon.com/us/play-pokemon/about/tournaments-rules-and-resources';
|
||||
const RESOURCES_DIR = path.resolve(
|
||||
__dirname,
|
||||
'../../docs/projects/pokemon-professor/Pokemon Rules & Resources'
|
||||
);
|
||||
|
||||
/**
|
||||
* Clean filename for filesystem
|
||||
*/
|
||||
function sanitizeFilename(name) {
|
||||
return name
|
||||
.replace(/[<>:"/\\|?*]/g, '-')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Main organization function
|
||||
*/
|
||||
async function organizeResources() {
|
||||
console.log('🚀 Starting Pokémon Resources Organization');
|
||||
console.log(`📁 Resources directory: ${RESOURCES_DIR}\n`);
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-blink-features=AutomationControlled'
|
||||
]
|
||||
});
|
||||
|
||||
try {
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
await page.setUserAgent(
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
||||
);
|
||||
|
||||
console.log('🌐 Loading main page to extract structure...');
|
||||
await page.goto(BASE_URL, { waitUntil: 'networkidle0', timeout: 90000 });
|
||||
await new Promise(resolve => setTimeout(resolve, 3000));
|
||||
|
||||
// Extract the page structure: sections and their resources
|
||||
const structure = await page.evaluate(() => {
|
||||
const sections = [];
|
||||
|
||||
// Get the main content area
|
||||
const mainContent =
|
||||
document.querySelector('main, .main-content, article, #content') ||
|
||||
document.body;
|
||||
|
||||
// Look for all text nodes and elements that might be section headers
|
||||
// The page likely uses specific patterns for section titles
|
||||
const allElements = Array.from(mainContent.querySelectorAll('*'));
|
||||
|
||||
for (let i = 0; i < allElements.length; i++) {
|
||||
const element = allElements[i];
|
||||
const text = element.innerText?.trim() || '';
|
||||
|
||||
// Check if this looks like a section header
|
||||
// Pattern: ends with "Rules & Resources" or "Training Videos" or similar
|
||||
const isSectionHeader =
|
||||
(text.includes('Rules & Resources') ||
|
||||
text.includes('Training Videos') ||
|
||||
text === 'Further Resources for Players') &&
|
||||
text.length < 100 &&
|
||||
!text.includes('\n') &&
|
||||
element.children.length < 3;
|
||||
|
||||
if (!isSectionHeader) continue;
|
||||
|
||||
const sectionTitle = text;
|
||||
const links = [];
|
||||
|
||||
// Look ahead to find links belonging to this section
|
||||
// Stop when we hit another section header
|
||||
for (let j = i + 1; j < allElements.length && j < i + 50; j++) {
|
||||
const nextEl = allElements[j];
|
||||
const nextText = nextEl.innerText?.trim() || '';
|
||||
|
||||
// Stop if we hit another section header
|
||||
if (
|
||||
(nextText.includes('Rules & Resources') ||
|
||||
nextText.includes('Training Videos') ||
|
||||
nextText === 'Further Resources for Players') &&
|
||||
nextText.length < 100 &&
|
||||
!nextText.includes('\n') &&
|
||||
nextEl.children.length < 3 &&
|
||||
nextText !== sectionTitle
|
||||
) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Look for links in this element
|
||||
if (nextEl.tagName === 'A' && nextEl.href) {
|
||||
const linkText = nextEl.innerText.trim();
|
||||
if (
|
||||
linkText &&
|
||||
!nextEl.href.includes('javascript:') &&
|
||||
!nextEl.href.includes('#')
|
||||
) {
|
||||
links.push({ text: linkText, href: nextEl.href });
|
||||
}
|
||||
}
|
||||
|
||||
// Also check child links
|
||||
const childLinks = nextEl.querySelectorAll('a[href]');
|
||||
childLinks.forEach(a => {
|
||||
const linkText = a.innerText.trim();
|
||||
if (
|
||||
linkText &&
|
||||
!a.href.includes('javascript:') &&
|
||||
!a.href.includes('#') &&
|
||||
a.href.startsWith('http')
|
||||
) {
|
||||
links.push({ text: linkText, href: a.href });
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (links.length > 0) {
|
||||
// Deduplicate links
|
||||
const uniqueLinks = [];
|
||||
const seen = new Set();
|
||||
links.forEach(link => {
|
||||
if (!seen.has(link.text)) {
|
||||
seen.add(link.text);
|
||||
uniqueLinks.push(link);
|
||||
}
|
||||
});
|
||||
|
||||
sections.push({
|
||||
title: sectionTitle,
|
||||
links: uniqueLinks
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return sections;
|
||||
});
|
||||
|
||||
console.log(`\n📋 Found ${structure.length} sections\n`);
|
||||
|
||||
// Deduplicate sections by title
|
||||
const uniqueSections = [];
|
||||
const seenTitles = new Set();
|
||||
structure.forEach(section => {
|
||||
if (!seenTitles.has(section.title)) {
|
||||
seenTitles.add(section.title);
|
||||
// Skip the main "Play! Pokémon Rules & Resources" section header
|
||||
// as it's just a page title, not a content section
|
||||
if (section.title !== 'Play! Pokémon Rules & Resources') {
|
||||
uniqueSections.push(section);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
console.log(
|
||||
`📋 After deduplication: ${uniqueSections.length} unique sections\n`
|
||||
);
|
||||
|
||||
// Get list of existing files
|
||||
const existingFiles = fs
|
||||
.readdirSync(RESOURCES_DIR)
|
||||
.filter(
|
||||
f =>
|
||||
!f.startsWith('.') &&
|
||||
!f.startsWith('debug') &&
|
||||
(f.endsWith('.pdf') || f.endsWith('.txt'))
|
||||
);
|
||||
|
||||
console.log(`📦 Found ${existingFiles.length} files to organize\n`);
|
||||
|
||||
let totalMoved = 0;
|
||||
let sectionIndex = 1;
|
||||
|
||||
// Process each section
|
||||
for (const section of uniqueSections) {
|
||||
const sectionName = sanitizeFilename(section.title);
|
||||
const folderName = `${sectionIndex.toString().padStart(2, '0')}-${sectionName}`;
|
||||
const folderPath = path.join(RESOURCES_DIR, folderName);
|
||||
|
||||
// Create section folder
|
||||
if (!fs.existsSync(folderPath)) {
|
||||
fs.mkdirSync(folderPath, { recursive: true });
|
||||
}
|
||||
|
||||
console.log(`📂 Section ${sectionIndex}: ${section.title}`);
|
||||
console.log(` Folder: ${folderName}`);
|
||||
console.log(` Resources: ${section.links.length}\n`);
|
||||
|
||||
let resourceIndex = 1;
|
||||
|
||||
// Process each resource in this section
|
||||
for (const link of section.links) {
|
||||
const resourceName = sanitizeFilename(link.text);
|
||||
|
||||
// Find matching file
|
||||
const matchingFile = existingFiles.find(f => {
|
||||
const baseName = f.replace(/\.(pdf|txt)$/, '');
|
||||
return (
|
||||
baseName === resourceName ||
|
||||
baseName.includes(resourceName) ||
|
||||
resourceName.includes(baseName)
|
||||
);
|
||||
});
|
||||
|
||||
if (matchingFile) {
|
||||
const oldPath = path.join(RESOURCES_DIR, matchingFile);
|
||||
const extension = path.extname(matchingFile);
|
||||
const newName = `${sectionIndex.toString().padStart(2, '0')}-${resourceIndex.toString().padStart(2, '0')}-${resourceName}${extension}`;
|
||||
const newPath = path.join(folderPath, newName);
|
||||
|
||||
// Move and rename file
|
||||
if (fs.existsSync(oldPath)) {
|
||||
fs.renameSync(oldPath, newPath);
|
||||
console.log(` ✅ ${resourceIndex}. ${resourceName}${extension}`);
|
||||
totalMoved++;
|
||||
}
|
||||
} else {
|
||||
console.log(` ⚠️ ${resourceIndex}. ${resourceName} (not found)`);
|
||||
}
|
||||
|
||||
resourceIndex++;
|
||||
}
|
||||
|
||||
console.log('');
|
||||
sectionIndex++;
|
||||
}
|
||||
|
||||
// Move any remaining files to an "Other" folder
|
||||
const remainingFiles = fs
|
||||
.readdirSync(RESOURCES_DIR)
|
||||
.filter(
|
||||
f =>
|
||||
!f.startsWith('.') &&
|
||||
!fs.statSync(path.join(RESOURCES_DIR, f)).isDirectory() &&
|
||||
(f.endsWith('.pdf') || f.endsWith('.txt'))
|
||||
);
|
||||
|
||||
if (remainingFiles.length > 0) {
|
||||
const otherFolder = path.join(RESOURCES_DIR, '99-Other');
|
||||
if (!fs.existsSync(otherFolder)) {
|
||||
fs.mkdirSync(otherFolder, { recursive: true });
|
||||
}
|
||||
|
||||
console.log(
|
||||
`📂 Moving ${remainingFiles.length} unmatched files to Other folder\n`
|
||||
);
|
||||
|
||||
remainingFiles.forEach((file, index) => {
|
||||
const oldPath = path.join(RESOURCES_DIR, file);
|
||||
const newName = `99-${(index + 1).toString().padStart(2, '0')}-${file}`;
|
||||
const newPath = path.join(otherFolder, newName);
|
||||
fs.renameSync(oldPath, newPath);
|
||||
console.log(` ✅ ${file}`);
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`\n🎉 Organization complete!`);
|
||||
console.log(`📊 Statistics:`);
|
||||
console.log(` Sections created: ${sectionIndex - 1}`);
|
||||
console.log(` Files organized: ${totalMoved}`);
|
||||
console.log(` Files in Other: ${remainingFiles.length}`);
|
||||
} catch (error) {
|
||||
console.error('❌ Error:', error.message);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
organizeResources();
|
||||
389
code/utils/scrape-pokemon-resources.js
Normal file
389
code/utils/scrape-pokemon-resources.js
Normal file
@@ -0,0 +1,389 @@
|
||||
/**
|
||||
* Pokémon Play! Resources Scraper
|
||||
*
|
||||
* Downloads official tournament rules, resources, and documentation from pokemon.com
|
||||
* - PDFs: Downloads directly
|
||||
* - Videos: Saves video URLs to text files
|
||||
* - Web pages: Extracts and saves text content
|
||||
*
|
||||
* Usage:
|
||||
* node code/utils/scrape-pokemon-resources.js
|
||||
* npm run scrape:pokemon
|
||||
*
|
||||
* Output: docs/projects/pokemon-professor/Pokemon Rules & Resources/
|
||||
*/
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import https from 'https';
|
||||
import http from 'http';
|
||||
|
||||
// Add stealth plugin to avoid bot detection
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
// Configuration
|
||||
const BASE_URL =
|
||||
'https://www.pokemon.com/us/play-pokemon/about/tournaments-rules-and-resources';
|
||||
const OUTPUT_DIR = path.resolve(
|
||||
__dirname,
|
||||
'../../docs/projects/pokemon-professor/Pokemon Rules & Resources'
|
||||
);
|
||||
|
||||
// Target resource names from the page
|
||||
const TARGET_RESOURCES = [
|
||||
// Rules & Resources for All
|
||||
'Play! Pokémon Terms of Use',
|
||||
'Play! Pokémon Standards of Conduct',
|
||||
'Play! Pokémon Inclusion Policy',
|
||||
'Play! Pokémon Accessibility Policy',
|
||||
'Play! Pokémon Trainer Username and Team Name Policy',
|
||||
'Play! Pokémon Premier Events Sponsorship Policy',
|
||||
'Play! Pokémon Tournament Rules Handbook',
|
||||
'Play! Pokémon COVID-19 Protocols',
|
||||
'Play! Pokémon Attire and Cosplay Policy',
|
||||
'Play! Pokémon Penalty Guidelines',
|
||||
|
||||
// Pokémon TCG Rules & Resources
|
||||
'Pokémon TCG Rulebook',
|
||||
'Play! Pokémon Deck List (8.5x11)',
|
||||
'Play! Pokémon Deck List (A4)',
|
||||
'TCG Errata',
|
||||
'Pokémon TCG Banned Card List',
|
||||
'Mega Evolution—Phantasmal Flames Banned List and Rule Changes Announcement',
|
||||
'Pokémon TCG Promo Card Legality Status',
|
||||
'Pokémon TCG Alternative Play Handbook',
|
||||
'Pokémon TCG Tournament Handbook',
|
||||
|
||||
// Video Game Rules & Resources
|
||||
'Play! Pokémon Video Game Championships Tournament Handbook',
|
||||
'Pokémon Video Game Team List',
|
||||
|
||||
// Pokémon GO Rules & Resources
|
||||
'Play! Pokémon Pokémon GO Tournament Handbook',
|
||||
'Pokémon GO Team List',
|
||||
'Play! Pokémon Pokémon GO Championship Series Banned Pokémon List',
|
||||
'Organizing Pokémon GO Events',
|
||||
|
||||
// Pokémon UNITE Rules & Resources
|
||||
'Pokémon UNITE Championship Series Handbook',
|
||||
|
||||
// Pokémon League Rules & Resources
|
||||
'Play! Pokémon Store Handbook',
|
||||
'Play! Pokémon League Challenges, Cups, and Prerelease Guide',
|
||||
'League Roster',
|
||||
'League Flyer',
|
||||
|
||||
// Pokémon Club Rules & Resources
|
||||
'Pokémon Activity Sheets',
|
||||
|
||||
// Further Resources for Players
|
||||
'World Championships Battle Dictionary',
|
||||
'Play! Pokémon Scholarship Program Terms and Conditions',
|
||||
'Championship Event Awards Disbursement Information',
|
||||
|
||||
// Training Videos
|
||||
'League Management Demos',
|
||||
'Tournament Software and Reporting Events',
|
||||
'Championship Series Reporting',
|
||||
'TOM Training Videos',
|
||||
'Tools Overview',
|
||||
'Installation and Set-up',
|
||||
'Setting Up Your Tournament',
|
||||
'Tournament Detail Verification',
|
||||
'Running & Completing the Tournament',
|
||||
'Reporting Matches',
|
||||
'Adding Players'
|
||||
];
|
||||
|
||||
/**
|
||||
* Clean filename for filesystem
|
||||
* @param {string} name - Original name
|
||||
* @returns {string} Safe filename
|
||||
*/
|
||||
function sanitizeFilename(name) {
|
||||
return name
|
||||
.replace(/[<>:"/\\|?*]/g, '-')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Download file from URL using page context with cookies
|
||||
* @param {Page} page - Puppeteer page
|
||||
* @param {string} url - File URL
|
||||
* @param {string} filepath - Destination path
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async function downloadFile(page, url, filepath) {
|
||||
// Get cookies from the current page session
|
||||
const cookies = await page.cookies();
|
||||
const cookieString = cookies.map(c => `${c.name}=${c.value}`).join('; ');
|
||||
|
||||
// Use page.evaluate to download with fetch
|
||||
const buffer = await page.evaluate(async downloadUrl => {
|
||||
const response = await fetch(downloadUrl, {
|
||||
method: 'GET',
|
||||
credentials: 'include'
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
return Array.from(new Uint8Array(arrayBuffer));
|
||||
}, url);
|
||||
|
||||
const bufferData = Buffer.from(buffer);
|
||||
|
||||
// Verify it's actually a PDF
|
||||
const header = bufferData.slice(0, 5).toString();
|
||||
if (!header.startsWith('%PDF')) {
|
||||
throw new Error(`Downloaded file is not a PDF (got: ${header})`);
|
||||
}
|
||||
|
||||
fs.writeFileSync(filepath, bufferData);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract text content from a web page
|
||||
* @param {Page} page - Puppeteer page
|
||||
* @returns {Promise<string>} Page text content
|
||||
*/
|
||||
async function extractPageText(page) {
|
||||
return await page.evaluate(() => {
|
||||
// Remove script and style elements
|
||||
const scripts = document.querySelectorAll(
|
||||
'script, style, nav, footer, header'
|
||||
);
|
||||
scripts.forEach(el => el.remove());
|
||||
|
||||
// Get main content
|
||||
const main =
|
||||
document.querySelector('main, article, .content, #content') ||
|
||||
document.body;
|
||||
return main.innerText.trim();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Main scraping function
|
||||
*/
|
||||
async function scrapeResources() {
|
||||
console.log('🚀 Starting Pokémon Play! Resources Scraper');
|
||||
console.log(`📁 Output directory: ${OUTPUT_DIR}\n`);
|
||||
|
||||
// Create output directory
|
||||
if (!fs.existsSync(OUTPUT_DIR)) {
|
||||
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
|
||||
console.log('✅ Created output directory\n');
|
||||
}
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
'--disable-web-security',
|
||||
'--disable-features=IsolateOrigins,site-per-process'
|
||||
]
|
||||
});
|
||||
|
||||
try {
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
// Set realistic user agent
|
||||
await page.setUserAgent(
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
||||
);
|
||||
|
||||
// Set extra headers to appear more like a real browser
|
||||
await page.setExtraHTTPHeaders({
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
Accept:
|
||||
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
||||
});
|
||||
|
||||
// Navigate to main page
|
||||
console.log('🌐 Loading main page...');
|
||||
await page.goto(BASE_URL, { waitUntil: 'networkidle0', timeout: 90000 });
|
||||
|
||||
// Wait for content to load - try waiting for a specific element
|
||||
console.log('⏳ Waiting for content to render...');
|
||||
try {
|
||||
await page.waitForSelector('a[href*=".pdf"], .resource, article', {
|
||||
timeout: 10000
|
||||
});
|
||||
} catch (e) {
|
||||
console.log(
|
||||
'⚠️ Timeout waiting for specific selectors, continuing anyway...'
|
||||
);
|
||||
}
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, 5000));
|
||||
|
||||
console.log('✅ Page loaded\n');
|
||||
|
||||
// Debug: Take a screenshot
|
||||
await page.screenshot({
|
||||
path: path.join(OUTPUT_DIR, 'debug-screenshot.png'),
|
||||
fullPage: true
|
||||
});
|
||||
console.log('📸 Screenshot saved for debugging\n');
|
||||
|
||||
// Debug: Dump HTML content
|
||||
const html = await page.content();
|
||||
fs.writeFileSync(path.join(OUTPUT_DIR, 'debug-page-source.html'), html);
|
||||
console.log('📄 HTML source saved for debugging\n');
|
||||
|
||||
// Get all links on the page with multiple strategies
|
||||
const links = await page.evaluate(() => {
|
||||
const anchors = Array.from(document.querySelectorAll('a'));
|
||||
const allLinks = anchors
|
||||
.map(a => ({
|
||||
text: a.innerText.trim(),
|
||||
href: a.href,
|
||||
title: a.title || '',
|
||||
ariaLabel: a.getAttribute('aria-label') || ''
|
||||
}))
|
||||
.filter(
|
||||
link =>
|
||||
(link.text || link.title || link.ariaLabel) &&
|
||||
link.href &&
|
||||
!link.href.startsWith('javascript:') &&
|
||||
!link.href.includes('#')
|
||||
);
|
||||
|
||||
// Also try to get download links specifically
|
||||
const downloadLinks = Array.from(
|
||||
document.querySelectorAll('[download], a[href*=".pdf"]')
|
||||
).map(a => ({
|
||||
text: a.innerText.trim() || a.getAttribute('download') || a.title,
|
||||
href: a.href
|
||||
}));
|
||||
|
||||
return [...allLinks, ...downloadLinks].filter(
|
||||
(link, index, self) =>
|
||||
index === self.findIndex(l => l.href === link.href)
|
||||
);
|
||||
});
|
||||
|
||||
console.log(`📋 Found ${links.length} total links on page`);
|
||||
|
||||
// Debug: Show first 10 links
|
||||
if (links.length > 0) {
|
||||
console.log('\n📝 Sample links found:');
|
||||
links.slice(0, 10).forEach((link, i) => {
|
||||
console.log(` ${i + 1}. ${link.text.substring(0, 60)}...`);
|
||||
});
|
||||
console.log('');
|
||||
} else {
|
||||
console.log(
|
||||
'⚠️ No links found - page may require different loading strategy\n'
|
||||
);
|
||||
}
|
||||
|
||||
// Process each target resource
|
||||
let processed = 0;
|
||||
let downloaded = 0;
|
||||
let skipped = 0;
|
||||
|
||||
for (const targetName of TARGET_RESOURCES) {
|
||||
// Find matching link (case-insensitive, fuzzy match)
|
||||
const link = links.find(
|
||||
l =>
|
||||
l.text.toLowerCase().includes(targetName.toLowerCase()) ||
|
||||
targetName.toLowerCase().includes(l.text.toLowerCase())
|
||||
);
|
||||
|
||||
if (!link) {
|
||||
console.log(`⚠️ Could not find link for: ${targetName}`);
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
processed++;
|
||||
const safeFilename = sanitizeFilename(targetName);
|
||||
|
||||
try {
|
||||
// Check if it's a PDF
|
||||
if (link.href.toLowerCase().endsWith('.pdf')) {
|
||||
const filepath = path.join(OUTPUT_DIR, `${safeFilename}.pdf`);
|
||||
console.log(`📥 Downloading PDF: ${targetName}`);
|
||||
console.log(` URL: ${link.href}`);
|
||||
await downloadFile(page, link.href, filepath);
|
||||
console.log(` ✅ Saved: ${safeFilename}.pdf\n`);
|
||||
downloaded++;
|
||||
}
|
||||
// Check if it's a video link (YouTube, Vimeo, etc.)
|
||||
else if (
|
||||
link.href.includes('youtube.com') ||
|
||||
link.href.includes('youtu.be') ||
|
||||
link.href.includes('vimeo.com') ||
|
||||
link.href.includes('video')
|
||||
) {
|
||||
const filepath = path.join(
|
||||
OUTPUT_DIR,
|
||||
`${safeFilename} - Video URL.txt`
|
||||
);
|
||||
console.log(`🎥 Saving video URL: ${targetName}`);
|
||||
fs.writeFileSync(
|
||||
filepath,
|
||||
`${targetName}\n\nVideo URL: ${link.href}\n`
|
||||
);
|
||||
console.log(` ✅ Saved: ${safeFilename} - Video URL.txt\n`);
|
||||
downloaded++;
|
||||
}
|
||||
// Otherwise, extract page text
|
||||
else {
|
||||
console.log(`📄 Extracting text from: ${targetName}`);
|
||||
console.log(` URL: ${link.href}`);
|
||||
|
||||
const contentPage = await browser.newPage();
|
||||
await contentPage.goto(link.href, {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
const text = await extractPageText(contentPage);
|
||||
await contentPage.close();
|
||||
|
||||
const filepath = path.join(OUTPUT_DIR, `${safeFilename}.txt`);
|
||||
fs.writeFileSync(
|
||||
filepath,
|
||||
`${targetName}\n\nSource: ${link.href}\n\n${text}\n`
|
||||
);
|
||||
console.log(` ✅ Saved: ${safeFilename}.txt\n`);
|
||||
downloaded++;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(
|
||||
` ❌ Error processing ${targetName}: ${error.message}\n`
|
||||
);
|
||||
skipped++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log('🎉 Scraping complete!');
|
||||
console.log(`📊 Statistics:`);
|
||||
console.log(` Total targets: ${TARGET_RESOURCES.length}`);
|
||||
console.log(` Processed: ${processed}`);
|
||||
console.log(` Downloaded: ${downloaded}`);
|
||||
console.log(` Skipped: ${skipped}`);
|
||||
} catch (error) {
|
||||
console.error('❌ Fatal error:', error.message);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
// Run scraper
|
||||
scrapeResources();
|
||||
Reference in New Issue
Block a user