Game scraper didn't counted updaste URL, better use of promises

pull/33/head
MillenniumEarl 2020-10-20 22:47:36 +02:00
parent be2e65e665
commit 5293ae3f1b
4 changed files with 50 additions and 29 deletions

View File

@ -480,9 +480,11 @@ async function loginF95(browser, username, password) {
await page.goto(constURLs.F95_LOGIN_URL); // Go to login page await page.goto(constURLs.F95_LOGIN_URL); // Go to login page
// Explicitly wait for the required items to load // Explicitly wait for the required items to load
await page.waitForSelector(selectors.USERNAME_INPUT); await Promise.all([
await page.waitForSelector(selectors.PASSWORD_INPUT); page.waitForSelector(selectors.USERNAME_INPUT),
await page.waitForSelector(selectors.LOGIN_BUTTON); page.waitForSelector(selectors.PASSWORD_INPUT),
page.waitForSelector(selectors.LOGIN_BUTTON),
]);
await page.type(selectors.USERNAME_INPUT, username); // Insert username await page.type(selectors.USERNAME_INPUT, username); // Insert username
await page.type(selectors.PASSWORD_INPUT, password); // Insert password await page.type(selectors.PASSWORD_INPUT, password); // Insert password
@ -549,10 +551,12 @@ async function getUserWatchedGameThreads(browser) {
await page.waitForSelector(selectors.WATCHED_THREAD_FILTER_POPUP_BUTTON); await page.waitForSelector(selectors.WATCHED_THREAD_FILTER_POPUP_BUTTON);
// Show the popup // Show the popup
await page.click(selectors.WATCHED_THREAD_FILTER_POPUP_BUTTON); await Promise.all([
await page.waitForSelector(selectors.UNREAD_THREAD_CHECKBOX); page.click(selectors.WATCHED_THREAD_FILTER_POPUP_BUTTON),
await page.waitForSelector(selectors.ONLY_GAMES_THREAD_OPTION); page.waitForSelector(selectors.UNREAD_THREAD_CHECKBOX),
await page.waitForSelector(selectors.FILTER_THREADS_BUTTON); page.waitForSelector(selectors.ONLY_GAMES_THREAD_OPTION),
page.waitForSelector(selectors.FILTER_THREADS_BUTTON),
]);
// Set the filters // Set the filters
await page.evaluate( await page.evaluate(
@ -561,11 +565,12 @@ async function getUserWatchedGameThreads(browser) {
selectors.UNREAD_THREAD_CHECKBOX selectors.UNREAD_THREAD_CHECKBOX
); // Also read the threads already read ); // Also read the threads already read
await page.click(selectors.ONLY_GAMES_THREAD_OPTION);
// Filter the threads // Filter the threads
await page.click(selectors.FILTER_THREADS_BUTTON); await Promise.all([
await page.waitForSelector(selectors.WATCHED_THREAD_URLS); page.click(selectors.ONLY_GAMES_THREAD_OPTION),
page.click(selectors.FILTER_THREADS_BUTTON),
page.waitForSelector(selectors.WATCHED_THREAD_URLS),
]);
// Get the threads urls // Get the threads urls
const urls = []; const urls = [];

View File

@ -10,7 +10,7 @@ const selectors = require("./constants/css-selectors.js");
const { preparePage } = require("./puppeteer-helper.js"); const { preparePage } = require("./puppeteer-helper.js");
const GameDownload = require("./classes/game-download.js"); const GameDownload = require("./classes/game-download.js");
const GameInfo = require("./classes/game-info.js"); const GameInfo = require("./classes/game-info.js");
const { isStringAValidURL, isF95URL, urlExists } = require("./urls-helper.js"); const urlsHelper = require("./urls-helper.js");
/** /**
* @protected * @protected
@ -24,8 +24,8 @@ module.exports.getGameInfo = async function (browser, url) {
if (shared.debug) console.log("Obtaining game info"); if (shared.debug) console.log("Obtaining game info");
// Verify the correctness of the URL // Verify the correctness of the URL
if (!isF95URL(url)) throw new Error(url + " is not a valid F95Zone URL"); if (!urlsHelper.isF95URL(url)) throw new Error(url + " is not a valid F95Zone URL");
const exists = await urlExists(url); const exists = await urlsHelper.urlExists(url);
if (!exists) return null; if (!exists) return null;
const page = await preparePage(browser); // Set new isolated page const page = await preparePage(browser); // Set new isolated page
@ -40,26 +40,28 @@ module.exports.getGameInfo = async function (browser, url) {
const title = getGameTitle(page); const title = getGameTitle(page);
const author = getGameAuthor(page); const author = getGameAuthor(page);
const tags = getGameTags(page); const tags = getGameTags(page);
const previewSource = getGamePreviewSource(page); const redirectUrl = urlsHelper.getUrlRedirect(url);
//let downloadData = getGameDownloadLink(page);
info = await parsePrefixes(page, info); // Fill status/engines/isMod info = await parsePrefixes(page, info); // Fill status/engines/isMod
const structuredText = await getMainPostStructuredText(page); const structuredText = await getMainPostStructuredText(page);
const overview = getOverview(structuredText, info.isMod); const overview = getOverview(structuredText, info.isMod);
const parsedInfos = parseConversationPage(structuredText); const parsedInfos = parseConversationPage(structuredText);
const previewSource = getGamePreviewSource(page);
const changelog = getLastChangelog(page); const changelog = getLastChangelog(page);
// Fill in the GameInfo element with the information obtained // Fill in the GameInfo element with the information obtained
info.name = await title; info.name = await title;
info.author = await author; info.author = await author;
info.overview = overview;
info.tags = await tags; info.tags = await tags;
info.f95url = url; info.f95url = await redirectUrl;
info.overview = overview;
info.version = info.isMod ? parsedInfos.MOD_VERSION : parsedInfos.VERSION; info.version = info.isMod ? parsedInfos.MOD_VERSION : parsedInfos.VERSION;
info.lastUpdate = info.isMod info.lastUpdate = info.isMod
? parsedInfos.UPDATED ? parsedInfos.UPDATED
: parsedInfos.THREAD_UPDATED; : parsedInfos.THREAD_UPDATED;
info.previewSource = await previewSource; info.previewSource = await previewSource;
info.changelog = (await changelog) || "Unknown changelog"; info.changelog = (await changelog) || "Unknown changelog";
//let downloadData = getGameDownloadLink(page);
//info.downloadInfo = await downloadData; //info.downloadInfo = await downloadData;
/* Downloading games without going directly to /* Downloading games without going directly to
* the platform appears to be prohibited by * the platform appears to be prohibited by
@ -207,7 +209,7 @@ async function getGamePreviewSource(page) {
); );
// Check if the URL is valid // Check if the URL is valid
return isStringAValidURL(src) ? src : null; return urlsHelper.isStringAValidURL(src) ? src : null;
} }
/** /**
@ -424,7 +426,7 @@ function extractGameHostingData(platform, text) {
endIndex = tag.indexOf(HREF_END, startIndex); endIndex = tag.indexOf(HREF_END, startIndex);
const link = tag.substring(startIndex, endIndex); const link = tag.substring(startIndex, endIndex);
if (isStringAValidURL(link)) { if (urlsHelper.isStringAValidURL(link)) {
const gd = new GameDownload(); const gd = new GameDownload();
gd.hosting = hosting.toUpperCase(); gd.hosting = hosting.toUpperCase();
gd.link = link; gd.link = link;

View File

@ -15,7 +15,7 @@ const { isF95URL } = require("./urls-helper.js");
* Search the F95Zone portal to find possible conversations regarding the game you are looking for. * Search the F95Zone portal to find possible conversations regarding the game you are looking for.
* @param {puppeteer.Browser} browser Browser object used for navigation * @param {puppeteer.Browser} browser Browser object used for navigation
* @param {String} gamename Name of the game to search for * @param {String} gamename Name of the game to search for
* @returns {Promise<String[]>} List of URL of possible games obtained from the preliminary research on the F95 portal * @returns {Promise<String[]>} List of URL of possible games obtained from the preliminary research on the F95 portal
*/ */
module.exports.getSearchGameResults = async function (browser, gamename) { module.exports.getSearchGameResults = async function (browser, gamename) {
if (shared.debug) console.log("Searching " + gamename + " on F95Zone"); if (shared.debug) console.log("Searching " + gamename + " on F95Zone");
@ -27,13 +27,15 @@ module.exports.getSearchGameResults = async function (browser, gamename) {
}); // Go to the search form and wait for it }); // Go to the search form and wait for it
// Explicitly wait for the required items to load // Explicitly wait for the required items to load
await page.waitForSelector(selectors.SEARCH_FORM_TEXTBOX); await Promise.all([
await page.waitForSelector(selectors.TITLE_ONLY_CHECKBOX); page.waitForSelector(selectors.SEARCH_FORM_TEXTBOX),
await page.waitForSelector(selectors.SEARCH_BUTTON); page.waitForSelector(selectors.TITLE_ONLY_CHECKBOX),
page.waitForSelector(selectors.SEARCH_BUTTON)
]);
await page.type(selectors.SEARCH_FORM_TEXTBOX, gamename); // Type the game we desire await page.type(selectors.SEARCH_FORM_TEXTBOX, gamename); // Type the game we desire
await page.click(selectors.TITLE_ONLY_CHECKBOX); // Select only the thread with the game in the titles
await Promise.all([ await Promise.all([
page.click(selectors.TITLE_ONLY_CHECKBOX), // Select only the thread with the game in the titles
page.click(selectors.SEARCH_BUTTON), // Execute search page.click(selectors.SEARCH_BUTTON), // Execute search
page.waitForNavigation({ page.waitForNavigation({
waitUntil: shared.WAIT_STATEMENT, waitUntil: shared.WAIT_STATEMENT,

View File

@ -35,7 +35,7 @@ module.exports.isStringAValidURL = function (url) {
}; };
/** /**
* @public * @protected
* Check if a particular URL is valid and reachable on the web. * Check if a particular URL is valid and reachable on the web.
* @param {String} url URL to check * @param {String} url URL to check
* @param {Boolean} checkRedirect If true, the function will consider redirects a violation and return false * @param {Boolean} checkRedirect If true, the function will consider redirects a violation and return false
@ -52,9 +52,21 @@ module.exports.urlExists = async function (url, checkRedirect) {
if (!valid) return false; if (!valid) return false;
if (checkRedirect) { if (checkRedirect) {
if (response.url === url) valid = true; let redirectUrl = await exports.getUrlRedirect(url);
if (redirectUrl === url) valid = true;
else valid = false; else valid = false;
} }
return valid; return valid;
}; };
/**
* @protected
* Check if the URL has a redirect to another page.
* @param {String} url URL to check for redirect
* @returns {Promise<String>} Redirect URL or the passed URL
*/
module.exports.getUrlRedirect = async function(url) {
const response = await ky.head(url);
return response.url;
}