"use strict"; // Public modules from npm const HTMLParser = require("node-html-parser"); const puppeteer = require("puppeteer"); // Modules from file const shared = require("./shared.js"); const selectors = require("./constants/css-selectors.js"); const { preparePage } = require("./puppeteer-helper.js"); const GameDownload = require("./classes/game-download.js"); const GameInfo = require("./classes/game-info.js"); const { isStringAValidURL, isF95URL, urlExists } = require("./urls-helper.js"); /** * @protected * Get information from the game's main page. * @param {puppeteer.Browser} browser Browser object used for navigation * @param {String} url URL (String) of the game/mod to extract data from * @return {Promise} Complete information about the game you are * looking for or null if the URL doesn't exists */ module.exports.getGameInfo = async function (browser, url) { if (shared.debug) console.log("Obtaining game info"); // Verify the correctness of the URL if (!isF95URL(url)) throw url + " is not a valid F95Zone URL"; const exists = await urlExists(url); if (!exists) return null; const page = await preparePage(browser); // Set new isolated page await page.setCookie(...shared.cookies); // Set cookies to avoid login await page.goto(url, { waitUntil: shared.WAIT_STATEMENT, }); // Go to the game page and wait until it loads // It asynchronously searches for the elements and // then waits at the end to compile the object to be returned let info = new GameInfo(); const title = getGameTitle(page); const author = getGameAuthor(page); const tags = getGameTags(page); const previewSource = getGamePreviewSource(page); //let downloadData = getGameDownloadLink(page); info = await parsePrefixes(page, info); // Fill status/engines/isMod const structuredText = await getMainPostStructuredText(page); const overview = getOverview(structuredText, info.isMod); const parsedInfos = parseConversationPage(structuredText); const changelog = getLastChangelog(page); // Fill in the GameInfo element with the information obtained info.name = await title; info.author = await author; info.overview = overview; info.tags = await tags; info.f95url = url; info.version = info.isMod ? parsedInfos["MOD VERSION"] : parsedInfos["VERSION"]; info.lastUpdate = info.isMod ? parsedInfos["UPDATED"] : parsedInfos["THREAD UPDATED"]; info.previewSource = await previewSource; const temp = await changelog; info.changelog = temp ? temp : "Unknown changelog"; //info.downloadInfo = await downloadData; /* Downloading games without going directly to * the platform appears to be prohibited by * the guidelines. It is therefore useless to * keep the links for downloading the games. */ await page.close(); // Close the page if (shared.debug) console.log("Founded data for " + info.name); return info; }; /** * Obtain the game version without parsing again all the data of the game. * @param {puppeteer.Browser} browser Browser object used for navigation * @param {GameInfo} info Information about the game * @returns {Promise} Online version of the game */ module.exports.getGameVersionFromTitle = async function (browser, info) { const page = await preparePage(browser); // Set new isolated page await page.setCookie(...shared.cookies); // Set cookies to avoid login await page.goto(info.f95url, { waitUntil: shared.WAIT_STATEMENT, }); // Go to the game page and wait until it loads // Get the title const titleHTML = await page.evaluate( /* istanbul ignore next */ (selector) => document.querySelector(selector).innerHTML, selectors.GAME_TITLE ); const title = HTMLParser.parse(titleHTML).childNodes.pop().rawText; // The title is in the following format: [PREFIXES] NAME GAME [VERSION] [AUTHOR] const startIndex = title.indexOf("[") + 1; const endIndex = title.indexOf("]", startIndex); let version = title.substring(startIndex, endIndex).trim().toUpperCase(); if (version.startsWith("V")) version = version.replace("V", ""); // Replace only the first occurrence return version; }; //#region Private methods /** * @private * Get the game description from its web page. * Different processing depending on whether the game is a mod or not. * @param {String} text Structured text extracted from the game's web page * @param {Boolean} isMod Specify if it is a game or a mod * @returns {Promise} Game description */ function getOverview(text, isMod) { // Get overview (different parsing for game and mod) let overviewEndIndex; if (isMod) overviewEndIndex = text.indexOf("Updated"); else overviewEndIndex = text.indexOf("Thread Updated"); return text.substring(0, overviewEndIndex).replace("Overview:\n", "").trim(); } /** * @private * Extrapolate the page structure by removing the element tags * and leaving only the text and its spacing. * @param {puppeteer.Page} page Page containing the text * @returns {Promise} Structured text */ async function getMainPostStructuredText(page) { // Gets the first post, where are listed all the game's informations const post = (await page.$$(selectors.THREAD_POSTS))[0]; // The info are plain text so we need to parse the HTML code const bodyHTML = await page.evaluate( /* istanbul ignore next */ (mainPost) => mainPost.innerHTML, post ); return HTMLParser.parse(bodyHTML).structuredText; } /** * @private * Extrapolates and cleans the author from the page passed by parameter. * @param {puppeteer.Page} page Page containing the author to be extrapolated * @returns {Promise} Game author */ async function getGameAuthor(page) { // Get the game/mod name (without square brackets) const titleHTML = await page.evaluate( /* istanbul ignore next */ (selector) => document.querySelector(selector).innerHTML, selectors.GAME_TITLE ); const structuredTitle = HTMLParser.parse(titleHTML); // The last element **shoud be** the title without prefixes (engines, status, other...) const gameTitle = structuredTitle.childNodes.pop().rawText; // The last square brackets contain the author const startTitleIndex = gameTitle.lastIndexOf("[") + 1; return gameTitle.substring(startTitleIndex, gameTitle.length - 1).trim(); } /** * @private * Process the post text to get all the useful * information in the format *DESCRIPTOR : VALUE*. * @param {String} text Structured text of the post * @returns {Object} Dictionary of information */ function parseConversationPage(text) { const dataPairs = {}; // The information searched in the game post are one per line const splittedText = text.split("\n"); for (const line of splittedText) { if (!line.includes(":")) continue; // Create pair key/value const splitted = line.split(":"); const key = splitted[0].trim().toUpperCase(); // Uppercase to avoid mismatch const value = splitted[1].trim(); // Add pair to the dict if valid if (value != "") dataPairs[key] = value; } return dataPairs; } /** * @private * Gets the URL of the image used as a preview for the game in the conversation. * @param {puppeteer.Page} page Page containing the URL to be extrapolated * @returns {Promise} URL (String) of the image or null if failed to get it */ async function getGamePreviewSource(page) { const src = await page.evaluate( /* istanbul ignore next */ (selector) => { // Get the firs image available const img = document.querySelector(selector); if (img) return img.getAttribute("src"); else return null; }, selectors.GAME_IMAGES ); // Check if the URL is valid return isStringAValidURL(src) ? src : null; } /** * @private * Extrapolates and cleans the title from the page passed by parameter. * @param {puppeteer.Page} page Page containing the title to be extrapolated * @returns {Promise} Game title */ async function getGameTitle(page) { // Get the game/mod name (without square brackets) const titleHTML = await page.evaluate( /* istanbul ignore next */ (selector) => document.querySelector(selector).innerHTML, selectors.GAME_TITLE ); const structuredTitle = HTMLParser.parse(titleHTML); // The last element **shoud be** the title without prefixes (engines, status, other...) const gameTitle = structuredTitle.childNodes.pop().rawText; const endTitleIndex = gameTitle.indexOf("["); return gameTitle.substring(0, endTitleIndex).trim(); } /** * @private * Get the alphabetically sorted list of tags associated with the game. * @param {puppeteer.Page} page Page containing the tags to be extrapolated * @returns {Promise} List of uppercase tags */ async function getGameTags(page) { const tags = []; // Get the game tags for (const handle of await page.$$(selectors.GAME_TAGS)) { const tag = await page.evaluate( /* istanbul ignore next */ (element) => element.innerText, handle ); tags.push(tag.toUpperCase()); } return tags.sort(); } /** * @private * Process the game title prefixes to extract information such as game status, * graphics engine used, and whether it is a mod or original game. * @param {puppeteer.Page} page Page containing the prefixes to be extrapolated * @param {GameInfo} info Object to assign the identified information to * @returns {Promise} GameInfo object passed in to which the identified information has been added */ async function parsePrefixes(page, info) { const MOD_PREFIX = "MOD"; // The 'Ongoing' status is not specified, only 'Abandoned'/'OnHold'/'Complete' info.status = "Ongoing"; for (const handle of await page.$$(selectors.GAME_TITLE_PREFIXES)) { const value = await page.evaluate( /* istanbul ignore next */ (element) => element.innerText, handle ); // Clean the prefix const prefix = value.toUpperCase().replace("[", "").replace("]", "").trim(); // Getting infos... if (shared.statuses.includes(prefix)) info.status = prefix; else if (shared.engines.includes(prefix)) info.engine = prefix; // This is not a game but a mod else if (prefix === MOD_PREFIX) info.isMod = true; } return info; } /** * @private * Get the last changelog available for the game. * @param {puppeteer.Page} page Page containing the changelog * @returns {Promise} Changelog for the last version or null if no changelog is found */ async function getLastChangelog(page) { // Gets the first post, where are listed all the game's informations const post = (await page.$$(selectors.THREAD_POSTS))[0]; const spoiler = await post.$(selectors.THREAD_LAST_CHANGELOG); if (!spoiler) return null; const changelogHTML = await page.evaluate( /* istanbul ignore next */ (e) => e.innerText, spoiler ); const parsedText = HTMLParser.parse(changelogHTML).structuredText; return parsedText.replace("Spoiler", "").trim(); } /** * @private * Get game download links for different platforms. * @param {puppeteer.Page} page Page containing the links to be extrapolated * @returns {Promise} List of objects used for game download */ async function getGameDownloadLink(page) { // Most used hosting platforms const hostingPlatforms = [ "MEGA", "NOPY", "FILESUPLOAD", "MIXDROP", "UPLOADHAVEN", "PIXELDRAIN", "FILESFM", ]; // Supported OS platforms const platformOS = ["WIN", "LINUX", "MAC", "ALL"]; // Gets the which contains the download links const temp = await page.$$(selectors.DOWNLOAD_LINKS_CONTAINER); if (temp.length === 0) return []; // Look for the container that contains the links // It is necessary because the same css selector // also identifies other elements on the page let container = null; for (const candidate of temp) { if (container !== null) break; const upperText = ( await page.evaluate( /* istanbul ignore next */ (e) => e.innerText, candidate ) ).toUpperCase(); // Search if the container contains the name of a hosting platform for (const p of hostingPlatforms) { if (upperText.includes(p)) { container = candidate; break; } } } if (container === null) return []; // Extract the HTML text from the container const searchText = ( await page.evaluate( /* istanbul ignore next */ (e) => e.innerHTML, container ) ).toLowerCase(); // Parse the download links const downloadData = []; for (const platform of platformOS) { const data = extractGameHostingData(platform, searchText); downloadData.push(...data); } return downloadData; } /** * @private * From the HTML text it extracts the game download links for the specified operating system. * @param {String} platform Name of the operating system to look for a compatible link to. * It can only be *WIN/LINUX/MAC/ALL* * @param {String} text HTML string to extract links from * @returns {GameDownload[]} List of game download links for the selected platform */ function extractGameHostingData(platform, text) { const PLATFORM_BOLD_OPEN = ""; const CONTAINER_SPAN_CLOSE = ""; const LINK_OPEN = "platform let endIndex = text.indexOf(PLATFORM_BOLD_OPEN, startIndex) + PLATFORM_BOLD_OPEN.length; // Find the end of the container if (endIndex === -1) endIndex = text.indexOf(CONTAINER_SPAN_CLOSE, startIndex) + CONTAINER_SPAN_CLOSE.length; text = text.substring(startIndex, endIndex); const downloadData = []; const linkTags = text.split(LINK_OPEN); for (const tag of linkTags) { // Ignore non-link string if (!tag.includes(HREF_START)) continue; // Find the hosting platform name startIndex = tag.indexOf(TAG_CLOSE) + TAG_CLOSE.length; endIndex = tag.indexOf(LINK_CLOSE, startIndex); const hosting = tag.substring(startIndex, endIndex); // Find the 'href' attribute startIndex = tag.indexOf(HREF_START) + HREF_START.length; endIndex = tag.indexOf(HREF_END, startIndex); const link = tag.substring(startIndex, endIndex); if (isStringAValidURL(link)) { const gd = new GameDownload(); gd.hosting = hosting.toUpperCase(); gd.link = link; gd.supportedOS = platform.toUpperCase(); downloadData.push(gd); } } return downloadData; } //#endregion Private methods