From 5293ae3f1b559df3eaab5c2b64c3c35a57fc2c1a Mon Sep 17 00:00:00 2001 From: MillenniumEarl Date: Tue, 20 Oct 2020 22:47:36 +0200 Subject: [PATCH] Game scraper didn't counted updaste URL, better use of promises --- app/index.js | 31 ++++++++++++++++++------------- app/scripts/game-scraper.js | 20 +++++++++++--------- app/scripts/game-searcher.js | 12 +++++++----- app/scripts/urls-helper.js | 16 ++++++++++++++-- 4 files changed, 50 insertions(+), 29 deletions(-) diff --git a/app/index.js b/app/index.js index 14c472c..52cfbe9 100644 --- a/app/index.js +++ b/app/index.js @@ -480,10 +480,12 @@ async function loginF95(browser, username, password) { await page.goto(constURLs.F95_LOGIN_URL); // Go to login page // Explicitly wait for the required items to load - await page.waitForSelector(selectors.USERNAME_INPUT); - await page.waitForSelector(selectors.PASSWORD_INPUT); - await page.waitForSelector(selectors.LOGIN_BUTTON); - + await Promise.all([ + page.waitForSelector(selectors.USERNAME_INPUT), + page.waitForSelector(selectors.PASSWORD_INPUT), + page.waitForSelector(selectors.LOGIN_BUTTON), + ]); + await page.type(selectors.USERNAME_INPUT, username); // Insert username await page.type(selectors.PASSWORD_INPUT, password); // Insert password await Promise.all([ @@ -549,10 +551,12 @@ async function getUserWatchedGameThreads(browser) { await page.waitForSelector(selectors.WATCHED_THREAD_FILTER_POPUP_BUTTON); // Show the popup - await page.click(selectors.WATCHED_THREAD_FILTER_POPUP_BUTTON); - await page.waitForSelector(selectors.UNREAD_THREAD_CHECKBOX); - await page.waitForSelector(selectors.ONLY_GAMES_THREAD_OPTION); - await page.waitForSelector(selectors.FILTER_THREADS_BUTTON); + await Promise.all([ + page.click(selectors.WATCHED_THREAD_FILTER_POPUP_BUTTON), + page.waitForSelector(selectors.UNREAD_THREAD_CHECKBOX), + page.waitForSelector(selectors.ONLY_GAMES_THREAD_OPTION), + page.waitForSelector(selectors.FILTER_THREADS_BUTTON), + ]); // Set the filters await page.evaluate( @@ -560,12 +564,13 @@ async function getUserWatchedGameThreads(browser) { document.querySelector(selector).removeAttribute("checked"), selectors.UNREAD_THREAD_CHECKBOX ); // Also read the threads already read - - await page.click(selectors.ONLY_GAMES_THREAD_OPTION); - + // Filter the threads - await page.click(selectors.FILTER_THREADS_BUTTON); - await page.waitForSelector(selectors.WATCHED_THREAD_URLS); + await Promise.all([ + page.click(selectors.ONLY_GAMES_THREAD_OPTION), + page.click(selectors.FILTER_THREADS_BUTTON), + page.waitForSelector(selectors.WATCHED_THREAD_URLS), + ]); // Get the threads urls const urls = []; diff --git a/app/scripts/game-scraper.js b/app/scripts/game-scraper.js index 7cd2bb8..b7db258 100644 --- a/app/scripts/game-scraper.js +++ b/app/scripts/game-scraper.js @@ -10,7 +10,7 @@ const selectors = require("./constants/css-selectors.js"); const { preparePage } = require("./puppeteer-helper.js"); const GameDownload = require("./classes/game-download.js"); const GameInfo = require("./classes/game-info.js"); -const { isStringAValidURL, isF95URL, urlExists } = require("./urls-helper.js"); +const urlsHelper = require("./urls-helper.js"); /** * @protected @@ -24,8 +24,8 @@ module.exports.getGameInfo = async function (browser, url) { if (shared.debug) console.log("Obtaining game info"); // Verify the correctness of the URL - if (!isF95URL(url)) throw new Error(url + " is not a valid F95Zone URL"); - const exists = await urlExists(url); + if (!urlsHelper.isF95URL(url)) throw new Error(url + " is not a valid F95Zone URL"); + const exists = await urlsHelper.urlExists(url); if (!exists) return null; const page = await preparePage(browser); // Set new isolated page @@ -40,26 +40,28 @@ module.exports.getGameInfo = async function (browser, url) { const title = getGameTitle(page); const author = getGameAuthor(page); const tags = getGameTags(page); - const previewSource = getGamePreviewSource(page); - //let downloadData = getGameDownloadLink(page); + const redirectUrl = urlsHelper.getUrlRedirect(url); info = await parsePrefixes(page, info); // Fill status/engines/isMod const structuredText = await getMainPostStructuredText(page); const overview = getOverview(structuredText, info.isMod); const parsedInfos = parseConversationPage(structuredText); + const previewSource = getGamePreviewSource(page); const changelog = getLastChangelog(page); // Fill in the GameInfo element with the information obtained info.name = await title; info.author = await author; - info.overview = overview; info.tags = await tags; - info.f95url = url; + info.f95url = await redirectUrl; + info.overview = overview; info.version = info.isMod ? parsedInfos.MOD_VERSION : parsedInfos.VERSION; info.lastUpdate = info.isMod ? parsedInfos.UPDATED : parsedInfos.THREAD_UPDATED; info.previewSource = await previewSource; info.changelog = (await changelog) || "Unknown changelog"; + + //let downloadData = getGameDownloadLink(page); //info.downloadInfo = await downloadData; /* Downloading games without going directly to * the platform appears to be prohibited by @@ -207,7 +209,7 @@ async function getGamePreviewSource(page) { ); // Check if the URL is valid - return isStringAValidURL(src) ? src : null; + return urlsHelper.isStringAValidURL(src) ? src : null; } /** @@ -424,7 +426,7 @@ function extractGameHostingData(platform, text) { endIndex = tag.indexOf(HREF_END, startIndex); const link = tag.substring(startIndex, endIndex); - if (isStringAValidURL(link)) { + if (urlsHelper.isStringAValidURL(link)) { const gd = new GameDownload(); gd.hosting = hosting.toUpperCase(); gd.link = link; diff --git a/app/scripts/game-searcher.js b/app/scripts/game-searcher.js index c60bb77..89fe6cb 100644 --- a/app/scripts/game-searcher.js +++ b/app/scripts/game-searcher.js @@ -15,7 +15,7 @@ const { isF95URL } = require("./urls-helper.js"); * Search the F95Zone portal to find possible conversations regarding the game you are looking for. * @param {puppeteer.Browser} browser Browser object used for navigation * @param {String} gamename Name of the game to search for - * @returns {Promise} List of URL of possible games obtained from the preliminary research on the F95 portal + * @returns {Promise} List of URL of possible games obtained from the preliminary research on the F95 portal */ module.exports.getSearchGameResults = async function (browser, gamename) { if (shared.debug) console.log("Searching " + gamename + " on F95Zone"); @@ -27,13 +27,15 @@ module.exports.getSearchGameResults = async function (browser, gamename) { }); // Go to the search form and wait for it // Explicitly wait for the required items to load - await page.waitForSelector(selectors.SEARCH_FORM_TEXTBOX); - await page.waitForSelector(selectors.TITLE_ONLY_CHECKBOX); - await page.waitForSelector(selectors.SEARCH_BUTTON); + await Promise.all([ + page.waitForSelector(selectors.SEARCH_FORM_TEXTBOX), + page.waitForSelector(selectors.TITLE_ONLY_CHECKBOX), + page.waitForSelector(selectors.SEARCH_BUTTON) + ]); await page.type(selectors.SEARCH_FORM_TEXTBOX, gamename); // Type the game we desire - await page.click(selectors.TITLE_ONLY_CHECKBOX); // Select only the thread with the game in the titles await Promise.all([ + page.click(selectors.TITLE_ONLY_CHECKBOX), // Select only the thread with the game in the titles page.click(selectors.SEARCH_BUTTON), // Execute search page.waitForNavigation({ waitUntil: shared.WAIT_STATEMENT, diff --git a/app/scripts/urls-helper.js b/app/scripts/urls-helper.js index b5fa93d..48ee295 100644 --- a/app/scripts/urls-helper.js +++ b/app/scripts/urls-helper.js @@ -35,7 +35,7 @@ module.exports.isStringAValidURL = function (url) { }; /** - * @public + * @protected * Check if a particular URL is valid and reachable on the web. * @param {String} url URL to check * @param {Boolean} checkRedirect If true, the function will consider redirects a violation and return false @@ -52,9 +52,21 @@ module.exports.urlExists = async function (url, checkRedirect) { if (!valid) return false; if (checkRedirect) { - if (response.url === url) valid = true; + let redirectUrl = await exports.getUrlRedirect(url); + if (redirectUrl === url) valid = true; else valid = false; } return valid; }; + +/** + * @protected + * Check if the URL has a redirect to another page. + * @param {String} url URL to check for redirect + * @returns {Promise} Redirect URL or the passed URL + */ +module.exports.getUrlRedirect = async function(url) { + const response = await ky.head(url); + return response.url; +}