Game scraper didn't counted updaste URL, better use of promises

pull/33/head
MillenniumEarl 2020-10-20 22:47:36 +02:00
parent be2e65e665
commit 5293ae3f1b
4 changed files with 50 additions and 29 deletions

View File

@ -480,10 +480,12 @@ async function loginF95(browser, username, password) {
await page.goto(constURLs.F95_LOGIN_URL); // Go to login page
// Explicitly wait for the required items to load
await page.waitForSelector(selectors.USERNAME_INPUT);
await page.waitForSelector(selectors.PASSWORD_INPUT);
await page.waitForSelector(selectors.LOGIN_BUTTON);
await Promise.all([
page.waitForSelector(selectors.USERNAME_INPUT),
page.waitForSelector(selectors.PASSWORD_INPUT),
page.waitForSelector(selectors.LOGIN_BUTTON),
]);
await page.type(selectors.USERNAME_INPUT, username); // Insert username
await page.type(selectors.PASSWORD_INPUT, password); // Insert password
await Promise.all([
@ -549,10 +551,12 @@ async function getUserWatchedGameThreads(browser) {
await page.waitForSelector(selectors.WATCHED_THREAD_FILTER_POPUP_BUTTON);
// Show the popup
await page.click(selectors.WATCHED_THREAD_FILTER_POPUP_BUTTON);
await page.waitForSelector(selectors.UNREAD_THREAD_CHECKBOX);
await page.waitForSelector(selectors.ONLY_GAMES_THREAD_OPTION);
await page.waitForSelector(selectors.FILTER_THREADS_BUTTON);
await Promise.all([
page.click(selectors.WATCHED_THREAD_FILTER_POPUP_BUTTON),
page.waitForSelector(selectors.UNREAD_THREAD_CHECKBOX),
page.waitForSelector(selectors.ONLY_GAMES_THREAD_OPTION),
page.waitForSelector(selectors.FILTER_THREADS_BUTTON),
]);
// Set the filters
await page.evaluate(
@ -560,12 +564,13 @@ async function getUserWatchedGameThreads(browser) {
document.querySelector(selector).removeAttribute("checked"),
selectors.UNREAD_THREAD_CHECKBOX
); // Also read the threads already read
await page.click(selectors.ONLY_GAMES_THREAD_OPTION);
// Filter the threads
await page.click(selectors.FILTER_THREADS_BUTTON);
await page.waitForSelector(selectors.WATCHED_THREAD_URLS);
await Promise.all([
page.click(selectors.ONLY_GAMES_THREAD_OPTION),
page.click(selectors.FILTER_THREADS_BUTTON),
page.waitForSelector(selectors.WATCHED_THREAD_URLS),
]);
// Get the threads urls
const urls = [];

View File

@ -10,7 +10,7 @@ const selectors = require("./constants/css-selectors.js");
const { preparePage } = require("./puppeteer-helper.js");
const GameDownload = require("./classes/game-download.js");
const GameInfo = require("./classes/game-info.js");
const { isStringAValidURL, isF95URL, urlExists } = require("./urls-helper.js");
const urlsHelper = require("./urls-helper.js");
/**
* @protected
@ -24,8 +24,8 @@ module.exports.getGameInfo = async function (browser, url) {
if (shared.debug) console.log("Obtaining game info");
// Verify the correctness of the URL
if (!isF95URL(url)) throw new Error(url + " is not a valid F95Zone URL");
const exists = await urlExists(url);
if (!urlsHelper.isF95URL(url)) throw new Error(url + " is not a valid F95Zone URL");
const exists = await urlsHelper.urlExists(url);
if (!exists) return null;
const page = await preparePage(browser); // Set new isolated page
@ -40,26 +40,28 @@ module.exports.getGameInfo = async function (browser, url) {
const title = getGameTitle(page);
const author = getGameAuthor(page);
const tags = getGameTags(page);
const previewSource = getGamePreviewSource(page);
//let downloadData = getGameDownloadLink(page);
const redirectUrl = urlsHelper.getUrlRedirect(url);
info = await parsePrefixes(page, info); // Fill status/engines/isMod
const structuredText = await getMainPostStructuredText(page);
const overview = getOverview(structuredText, info.isMod);
const parsedInfos = parseConversationPage(structuredText);
const previewSource = getGamePreviewSource(page);
const changelog = getLastChangelog(page);
// Fill in the GameInfo element with the information obtained
info.name = await title;
info.author = await author;
info.overview = overview;
info.tags = await tags;
info.f95url = url;
info.f95url = await redirectUrl;
info.overview = overview;
info.version = info.isMod ? parsedInfos.MOD_VERSION : parsedInfos.VERSION;
info.lastUpdate = info.isMod
? parsedInfos.UPDATED
: parsedInfos.THREAD_UPDATED;
info.previewSource = await previewSource;
info.changelog = (await changelog) || "Unknown changelog";
//let downloadData = getGameDownloadLink(page);
//info.downloadInfo = await downloadData;
/* Downloading games without going directly to
* the platform appears to be prohibited by
@ -207,7 +209,7 @@ async function getGamePreviewSource(page) {
);
// Check if the URL is valid
return isStringAValidURL(src) ? src : null;
return urlsHelper.isStringAValidURL(src) ? src : null;
}
/**
@ -424,7 +426,7 @@ function extractGameHostingData(platform, text) {
endIndex = tag.indexOf(HREF_END, startIndex);
const link = tag.substring(startIndex, endIndex);
if (isStringAValidURL(link)) {
if (urlsHelper.isStringAValidURL(link)) {
const gd = new GameDownload();
gd.hosting = hosting.toUpperCase();
gd.link = link;

View File

@ -15,7 +15,7 @@ const { isF95URL } = require("./urls-helper.js");
* Search the F95Zone portal to find possible conversations regarding the game you are looking for.
* @param {puppeteer.Browser} browser Browser object used for navigation
* @param {String} gamename Name of the game to search for
* @returns {Promise<String[]>} List of URL of possible games obtained from the preliminary research on the F95 portal
* @returns {Promise<String[]>} List of URL of possible games obtained from the preliminary research on the F95 portal
*/
module.exports.getSearchGameResults = async function (browser, gamename) {
if (shared.debug) console.log("Searching " + gamename + " on F95Zone");
@ -27,13 +27,15 @@ module.exports.getSearchGameResults = async function (browser, gamename) {
}); // Go to the search form and wait for it
// Explicitly wait for the required items to load
await page.waitForSelector(selectors.SEARCH_FORM_TEXTBOX);
await page.waitForSelector(selectors.TITLE_ONLY_CHECKBOX);
await page.waitForSelector(selectors.SEARCH_BUTTON);
await Promise.all([
page.waitForSelector(selectors.SEARCH_FORM_TEXTBOX),
page.waitForSelector(selectors.TITLE_ONLY_CHECKBOX),
page.waitForSelector(selectors.SEARCH_BUTTON)
]);
await page.type(selectors.SEARCH_FORM_TEXTBOX, gamename); // Type the game we desire
await page.click(selectors.TITLE_ONLY_CHECKBOX); // Select only the thread with the game in the titles
await Promise.all([
page.click(selectors.TITLE_ONLY_CHECKBOX), // Select only the thread with the game in the titles
page.click(selectors.SEARCH_BUTTON), // Execute search
page.waitForNavigation({
waitUntil: shared.WAIT_STATEMENT,

View File

@ -35,7 +35,7 @@ module.exports.isStringAValidURL = function (url) {
};
/**
* @public
* @protected
* Check if a particular URL is valid and reachable on the web.
* @param {String} url URL to check
* @param {Boolean} checkRedirect If true, the function will consider redirects a violation and return false
@ -52,9 +52,21 @@ module.exports.urlExists = async function (url, checkRedirect) {
if (!valid) return false;
if (checkRedirect) {
if (response.url === url) valid = true;
let redirectUrl = await exports.getUrlRedirect(url);
if (redirectUrl === url) valid = true;
else valid = false;
}
return valid;
};
/**
* @protected
* Check if the URL has a redirect to another page.
* @param {String} url URL to check for redirect
* @returns {Promise<String>} Redirect URL or the passed URL
*/
module.exports.getUrlRedirect = async function(url) {
const response = await ky.head(url);
return response.url;
}