chore: integrate old MemoryPalace files and configure auto-commit
This commit is contained in:
@@ -28,33 +28,33 @@
|
||||
*
|
||||
* Currently not working due to recaptch on P!P site
|
||||
*/
|
||||
const puppeteer = require("puppeteer");
|
||||
const cheerio = require("cheerio");
|
||||
const puppeteer = require('puppeteer');
|
||||
const cheerio = require('cheerio');
|
||||
|
||||
async function scrapeWebsite(url) {
|
||||
// Launch Puppeteer
|
||||
const browser = await puppeteer.launch({
|
||||
headless: false, // Run in headless mode
|
||||
args: [
|
||||
"--no-sandbox",
|
||||
"--disable-setuid-sandbox",
|
||||
"--disable-dev-shm-usage",
|
||||
"--disable-accelerated-2d-canvas",
|
||||
"--disable-gpu",
|
||||
"--window-size=1920x1080",
|
||||
],
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-accelerated-2d-canvas',
|
||||
'--disable-gpu',
|
||||
'--window-size=1920x1080'
|
||||
]
|
||||
});
|
||||
const page = await browser.newPage();
|
||||
// Set user agent to mimic a real browser
|
||||
await page.setUserAgent(
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
);
|
||||
|
||||
// Set viewport to mimic a real browser
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
// Navigate to the URL
|
||||
await page.goto(url, { waitUntil: "networkidle2" });
|
||||
await page.goto(url, { waitUntil: 'networkidle2' });
|
||||
|
||||
// Simulate human-like interactions
|
||||
await page.waitForTimeout(2000); // Wait for 2 seconds
|
||||
@@ -70,41 +70,41 @@ async function scrapeWebsite(url) {
|
||||
const $ = cheerio.load(content);
|
||||
|
||||
// Find all table elements
|
||||
const tables = $("table");
|
||||
const tables = $('table');
|
||||
const data = [];
|
||||
|
||||
// Loop through each table
|
||||
tables.each((index, table) => {
|
||||
const headers = [];
|
||||
const rows = $(table).find("tr");
|
||||
const rows = $(table).find('tr');
|
||||
|
||||
// Check if the first row contains the headers Date, Venue, and Location
|
||||
const firstRow = rows.first();
|
||||
firstRow.find("tr").each((i, th) => {
|
||||
firstRow.find('tr').each((i, th) => {
|
||||
headers.push($(th).text().trim().toLowerCase());
|
||||
});
|
||||
|
||||
if (
|
||||
headers.includes("date") &&
|
||||
headers.includes("venue") &&
|
||||
headers.includes("location")
|
||||
headers.includes('date') &&
|
||||
headers.includes('venue') &&
|
||||
headers.includes('location')
|
||||
) {
|
||||
// Loop through the remaining rows and extract data
|
||||
rows.slice(1).each((i, row) => {
|
||||
const cells = $(row).find("td");
|
||||
const cells = $(row).find('td');
|
||||
const rowData = {};
|
||||
|
||||
cells.each((j, cell) => {
|
||||
const header = headers[j];
|
||||
const cellText = $(cell).text().trim();
|
||||
|
||||
if (header === "date") {
|
||||
const dates = cellText.split(" - ");
|
||||
if (header === 'date') {
|
||||
const dates = cellText.split(' - ');
|
||||
rowData.startDate = dates[0];
|
||||
rowData.endDate = dates[1] || dates[0];
|
||||
} else if (header === "venue") {
|
||||
} else if (header === 'venue') {
|
||||
rowData.venue = cellText;
|
||||
} else if (header === "location") {
|
||||
} else if (header === 'location') {
|
||||
rowData.location = cellText;
|
||||
}
|
||||
});
|
||||
@@ -119,7 +119,7 @@ async function scrapeWebsite(url) {
|
||||
|
||||
// Example usage
|
||||
const url =
|
||||
"https://www.pokemon.com/us/play-pokemon/pokemon-events/championship-series/2025/regional-special-championships";
|
||||
'https://www.pokemon.com/us/play-pokemon/pokemon-events/championship-series/2025/regional-special-championships';
|
||||
scrapeWebsite(url)
|
||||
.then((data) => console.log(data))
|
||||
.catch((error) => console.error(error));
|
||||
.then(data => console.log(data))
|
||||
.catch(error => console.error(error));
|
||||
|
||||
Reference in New Issue
Block a user