From 662d3c7227fe0a5da5049f3b870b7fad61d10a9e Mon Sep 17 00:00:00 2001 From: MillenniumEarl Date: Mon, 12 Oct 2020 10:27:48 +0200 Subject: [PATCH] Better game search --- app/index.js | 92 ++----------------- app/scripts/constants/css-selectors.js | 4 +- app/scripts/game-searcher.js | 122 +++++++++++++++++++++++++ package-lock.json | 2 +- package.json | 2 +- test/test.js | 2 +- 6 files changed, 136 insertions(+), 88 deletions(-) create mode 100644 app/scripts/game-searcher.js diff --git a/app/index.js b/app/index.js index 1869ae7..fad52e3 100644 --- a/app/index.js +++ b/app/index.js @@ -12,11 +12,14 @@ const { urlExists, isF95URL, } = require("./scripts/urls-helper.js"); -const gameScraper = require("./scripts/game-scraper.js"); +const scraper = require("./scripts/game-scraper.js"); const { prepareBrowser, preparePage, } = require("./scripts/puppeteer-helper.js"); +const searcher = require("./scripts/game-searcher.js"); + +// Classes from file const GameInfo = require("./scripts/classes/game-info.js"); const LoginResult = require("./scripts/classes/login-result.js"); const UserData = require("./scripts/classes/user-data.js"); @@ -222,20 +225,20 @@ module.exports.getGameData = async function (name, includeMods) { if (_browser === null) _browser = await prepareBrowser(); browser = _browser; } - let urlList = await getSearchGameResults(browser, name); + let urlList = await searcher.getSearchGameResults(browser, name); // Process previous partial results let promiseList = []; for (let url of urlList) { // Start looking for information - promiseList.push(gameScraper.getGameInfo(browser, url)); + promiseList.push(scraper.getGameInfo(browser, url)); } // Filter for mods let result = []; for (let info of await Promise.all(promiseList)) { // Skip mods if not required - if(!info) continue; + if (!info) continue; if (info.isMod && !includeMods) continue; else result.push(info); } @@ -269,7 +272,7 @@ module.exports.getGameDataFromURL = async function (url) { } // Get game data - let result = await gameScraper.getGameInfo(browser, url); + let result = await scraper.getGameInfo(browser, url); if (shared.isolation) await browser.close(); return result; @@ -594,83 +597,4 @@ async function getUserWatchedGameThreads(browser) { } //#endregion User -//#region Game search -/** - * @private - * Search the F95Zone portal to find possible conversations regarding the game you are looking for. - * @param {puppeteer.Browser} browser Browser object used for navigation - * @param {String} gamename Name of the game to search for - * @returns {Promise} List of URL of possible games obtained from the preliminary research on the F95 portal - */ -async function getSearchGameResults(browser, gamename) { - if (shared.debug) console.log("Searching " + gamename + " on F95Zone"); - - let page = await preparePage(browser); // Set new isolated page - await page.setCookie(...shared.cookies); // Set cookies to avoid login - await page.goto(constURLs.F95_SEARCH_URL, { - waitUntil: shared.WAIT_STATEMENT, - }); // Go to the search form and wait for it - - // Explicitly wait for the required items to load - await page.waitForSelector(selectors.SEARCH_FORM_TEXTBOX); - await page.waitForSelector(selectors.TITLE_ONLY_CHECKBOX); - await page.waitForSelector(selectors.SEARCH_BUTTON); - - await page.type(selectors.SEARCH_FORM_TEXTBOX, gamename); // Type the game we desire - await page.click(selectors.TITLE_ONLY_CHECKBOX); // Select only the thread with the game in the titles - await page.click(selectors.SEARCH_BUTTON); // Execute search - await page.waitForNavigation({ - waitUntil: shared.WAIT_STATEMENT, - }); // Wait for page to load - - // Select all conversation titles - let threadTitleList = await page.$$(selectors.THREAD_TITLE); - - // For each title extract the info about the conversation - if (shared.debug) console.log("Extracting info from conversation titles"); - let results = []; - for (let title of threadTitleList) { - let gameUrl = await getOnlyGameThreads(page, title); - - // Append the game's informations - if (gameUrl !== null) results.push(gameUrl); - } - if (shared.debug) console.log("Find " + results.length + " conversations"); - await page.close(); // Close the page - - return results; -} -/** - * @private - * Return the link of a conversation if it is a game or a mod - * @param {puppeteer.Page} page Page containing the conversation to be analyzed - * @param {puppeteer.ElementHandle} titleHandle Title of the conversation to be analyzed - * @return {Promise} URL of the game/mod - */ -async function getOnlyGameThreads(page, titleHandle) { - const GAME_RECOMMENDATION_PREFIX = "RECOMMENDATION"; - - // Get the URL of the thread from the title - let relativeURLThread = await page.evaluate( - /* istanbul ignore next */ (element) => element.querySelector("a").href, - titleHandle - ); - let url = new URL(relativeURLThread, constURLs.F95_BASE_URL).toString(); - - // Parse prefixes to ignore game recommendation - for (let element of await titleHandle.$$('span[dir="auto"]')) { - // Elaborate the prefixes - let prefix = await page.evaluate( - /* istanbul ignore next */ (element) => element.textContent.toUpperCase(), - element - ); - prefix = prefix.replace("[", "").replace("]", ""); - - // This is not a game nor a mod, we can exit - if (prefix === GAME_RECOMMENDATION_PREFIX) return null; - } - return url; -} -//#endregion Game search - //#endregion Private methods diff --git a/app/scripts/constants/css-selectors.js b/app/scripts/constants/css-selectors.js index 0fcc04a..d0ca9cd 100644 --- a/app/scripts/constants/css-selectors.js +++ b/app/scripts/constants/css-selectors.js @@ -23,5 +23,7 @@ module.exports = Object.freeze({ WATCHED_THREAD_FILTER_POPUP_BUTTON: 'a.filterBar-menuTrigger', WATCHED_THREAD_NEXT_PAGE: 'a.pageNav-jump--next', WATCHED_THREAD_URLS: 'a[href^="/threads/"][data-tp-primary]', - DOWNLOAD_LINKS_CONTAINER: 'span[style="font-size: 18px"]' + DOWNLOAD_LINKS_CONTAINER: 'span[style="font-size: 18px"]', + SEARCH_THREADS_RESULTS_BODY: "div.contentRow-main", + SEARCH_THREADS_MEMBERSHIP: "li > a:not(.username)" }); \ No newline at end of file diff --git a/app/scripts/game-searcher.js b/app/scripts/game-searcher.js new file mode 100644 index 0000000..ea6b3bd --- /dev/null +++ b/app/scripts/game-searcher.js @@ -0,0 +1,122 @@ +"use strict"; + +// Public modules from npm +const puppeteer = require('puppeteer'); + +// Modules from file +const shared = require("./shared.js"); +const constURLs = require("./constants/urls.js"); +const selectors = require("./constants/css-selectors.js"); +const { + preparePage, +} = require("./puppeteer-helper.js"); + +/** + * @protected + * Search the F95Zone portal to find possible conversations regarding the game you are looking for. + * @param {puppeteer.Browser} browser Browser object used for navigation + * @param {String} gamename Name of the game to search for + * @returns {Promise} List of URL of possible games obtained from the preliminary research on the F95 portal + */ +module.exports.getSearchGameResults = async function(browser, gamename) { + if (shared.debug) console.log("Searching " + gamename + " on F95Zone"); + + let page = await preparePage(browser); // Set new isolated page + await page.setCookie(...shared.cookies); // Set cookies to avoid login + await page.goto(constURLs.F95_SEARCH_URL, { + waitUntil: shared.WAIT_STATEMENT, + }); // Go to the search form and wait for it + + // Explicitly wait for the required items to load + await page.waitForSelector(selectors.SEARCH_FORM_TEXTBOX); + await page.waitForSelector(selectors.TITLE_ONLY_CHECKBOX); + await page.waitForSelector(selectors.SEARCH_BUTTON); + + await page.type(selectors.SEARCH_FORM_TEXTBOX, gamename); // Type the game we desire + await page.click(selectors.TITLE_ONLY_CHECKBOX); // Select only the thread with the game in the titles + await page.click(selectors.SEARCH_BUTTON); // Execute search + await page.waitForNavigation({ + waitUntil: shared.WAIT_STATEMENT, + }); // Wait for page to load + + // Select all conversation titles + let resultsThread = await page.$$(selectors.SEARCH_THREADS_RESULTS_BODY); + + // For each element found extract the info about the conversation + if (shared.debug) console.log("Extracting info from conversations"); + let results = []; + for (let element of resultsThread) { + let gameUrl = await getOnlyGameThreads(page, element); + if (gameUrl !== null) results.push(gameUrl); + } + if (shared.debug) console.log("Find " + results.length + " conversations"); + await page.close(); // Close the page + + return results; +} + +//#region Private methods +/** + * @private + * Return the link of a conversation if it is a game or a mod. + * @param {puppeteer.Page} page Page containing the conversation to be analyzed + * @param {puppeteer.ElementHandle} divHandle Element of the conversation to be analyzed + * @return {Promise} URL of the game/mod or null if the URL is not of a game + */ +async function getOnlyGameThreads(page, divHandle) { + // Obtain the elements containing the basic information + let titleHandle = await divHandle.$(selectors.THREAD_TITLE); + let forumHandle = await divHandle.$(selectors.SEARCH_THREADS_MEMBERSHIP); + + // Get the forum where the thread was posted + let forum = await getMembershipForum(page, forumHandle); + if(forum !== "GAMES") return null; + + // Get the URL of the thread from the title + return await getThreadURL(page, titleHandle); +} + +/** + * @private + * Obtain the membership forum of the thread passed throught "handle". + * @param {puppeteer.Page} page Page containing the conversation to be analyzed + * @param {puppeteer.ElementHandle} handle Handle containing the forum membership + * @returns {Promise} Uppercase membership category + */ +async function getMembershipForum(page, handle) { + // The link can be something like: + // + /forums/request.NUMBER/ + // + /forums/game-recommendations-identification.NUMBER/ + // + /forums/games.NUMBER/ <-- We need this + + let link = await page.evaluate( + /* istanbul ignore next */ + (e) => e.getAttribute('href'), + handle + ); + + // Parse link + link = link.replace("/forums/", ""); + let endIndex = link.indexOf("."); + let forum = link.substring(0, endIndex); + + return forum.toUpperCase(); +} + +/** + * @private + * Obtain the URL of the thread passed through "handle". + * @param {puppeteer.Page} page Page containing the conversation to be analyzed + * @param {puppeteer.ElementHandle} handle Handle containing the thread title + * @returns {Promise} URL of the thread + */ +async function getThreadURL(page, handle) { + let relativeURLThread = await page.evaluate( + /* istanbul ignore next */ + (e) => e.querySelector("a").href, + handle + ); + let urlThread = new URL(relativeURLThread, constURLs.F95_BASE_URL).toString(); + return urlThread; +} +//#endregion Private methods diff --git a/package-lock.json b/package-lock.json index 0c090b9..17bf550 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "f95api", - "version": "1.0.2", + "version": "1.1.2", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/package.json b/package.json index 9e9c7b0..5144a3e 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "main": "./app/index.js", "name": "f95api", - "version": "1.0.2", + "version": "1.1.2", "author": { "name": "Millennium Earl" }, diff --git a/test/test.js b/test/test.js index bbc09ad..75f684d 100644 --- a/test/test.js +++ b/test/test.js @@ -19,7 +19,7 @@ async function main() { if (loginResult.success) { await loadF95BaseData(); - let gameData = await getGameData("champion", false); + let gameData = await getGameData("detective girl of the steam city", false); console.log(gameData); // let userData = await getUserData();