chore: integrate old MemoryPalace files and configure auto-commit

This commit is contained in:
2026-01-26 17:19:26 -05:00
parent 23cb27503e
commit e763f5a9d5
24 changed files with 2032 additions and 34 deletions

View File

@@ -28,33 +28,33 @@
*
* Currently not working due to recaptch on P!P site
*/
const puppeteer = require("puppeteer");
const cheerio = require("cheerio");
const puppeteer = require('puppeteer');
const cheerio = require('cheerio');
async function scrapeWebsite(url) {
// Launch Puppeteer
const browser = await puppeteer.launch({
headless: false, // Run in headless mode
args: [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage",
"--disable-accelerated-2d-canvas",
"--disable-gpu",
"--window-size=1920x1080",
],
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--disable-gpu',
'--window-size=1920x1080'
]
});
const page = await browser.newPage();
// Set user agent to mimic a real browser
await page.setUserAgent(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
);
// Set viewport to mimic a real browser
await page.setViewport({ width: 1920, height: 1080 });
// Navigate to the URL
await page.goto(url, { waitUntil: "networkidle2" });
await page.goto(url, { waitUntil: 'networkidle2' });
// Simulate human-like interactions
await page.waitForTimeout(2000); // Wait for 2 seconds
@@ -70,41 +70,41 @@ async function scrapeWebsite(url) {
const $ = cheerio.load(content);
// Find all table elements
const tables = $("table");
const tables = $('table');
const data = [];
// Loop through each table
tables.each((index, table) => {
const headers = [];
const rows = $(table).find("tr");
const rows = $(table).find('tr');
// Check if the first row contains the headers Date, Venue, and Location
const firstRow = rows.first();
firstRow.find("tr").each((i, th) => {
firstRow.find('tr').each((i, th) => {
headers.push($(th).text().trim().toLowerCase());
});
if (
headers.includes("date") &&
headers.includes("venue") &&
headers.includes("location")
headers.includes('date') &&
headers.includes('venue') &&
headers.includes('location')
) {
// Loop through the remaining rows and extract data
rows.slice(1).each((i, row) => {
const cells = $(row).find("td");
const cells = $(row).find('td');
const rowData = {};
cells.each((j, cell) => {
const header = headers[j];
const cellText = $(cell).text().trim();
if (header === "date") {
const dates = cellText.split(" - ");
if (header === 'date') {
const dates = cellText.split(' - ');
rowData.startDate = dates[0];
rowData.endDate = dates[1] || dates[0];
} else if (header === "venue") {
} else if (header === 'venue') {
rowData.venue = cellText;
} else if (header === "location") {
} else if (header === 'location') {
rowData.location = cellText;
}
});
@@ -119,7 +119,7 @@ async function scrapeWebsite(url) {
// Example usage
const url =
"https://www.pokemon.com/us/play-pokemon/pokemon-events/championship-series/2025/regional-special-championships";
'https://www.pokemon.com/us/play-pokemon/pokemon-events/championship-series/2025/regional-special-championships';
scrapeWebsite(url)
.then((data) => console.log(data))
.catch((error) => console.error(error));
.then(data => console.log(data))
.catch(error => console.error(error));