468 lines
15 KiB
JavaScript
468 lines
15 KiB
JavaScript
"use strict";
|
|
|
|
// Public modules from npm
|
|
const HTMLParser = require("node-html-parser");
|
|
const puppeteer = require("puppeteer"); // skipcq: JS-0128
|
|
|
|
// Modules from file
|
|
const shared = require("./shared.js");
|
|
const selectorK = require("./constants/css-selector.js");
|
|
const { preparePage } = require("./puppeteer-helper.js");
|
|
const GameDownload = require("./classes/game-download.js");
|
|
const GameInfo = require("./classes/game-info.js");
|
|
const urlHelper = require("./url-helper.js");
|
|
|
|
/**
|
|
* @protected
|
|
* Get information from the game's main page.
|
|
* @param {puppeteer.Browser} browser Browser object used for navigation
|
|
* @param {String} url URL (String) of the game/mod to extract data from
|
|
* @return {Promise<GameInfo>} Complete information about the game you are
|
|
* looking for
|
|
*/
|
|
module.exports.getGameInfo = async function (browser, url) {
|
|
shared.logger.info("Obtaining game info");
|
|
|
|
// Verify the correctness of the URL
|
|
const exists = await urlHelper.urlExists(url);
|
|
if (!exists) throw new URIError(`${url} is not a valid URL`);
|
|
if (!urlHelper.isF95URL(url))
|
|
throw new Error(`${url} is not a valid F95Zone URL`);
|
|
|
|
const page = await preparePage(browser); // Set new isolated page
|
|
await page.setCookie(...shared.cookies); // Set cookies to avoid login
|
|
await page.goto(url, {
|
|
waitUntil: shared.WAIT_STATEMENT,
|
|
}); // Go to the game page and wait until it loads
|
|
|
|
// It asynchronously searches for the elements and
|
|
// then waits at the end to compile the object to be returned
|
|
let info = new GameInfo();
|
|
const title = getGameTitle(page);
|
|
const author = getGameAuthor(page);
|
|
const tags = getGameTags(page);
|
|
const redirectUrl = urlHelper.getUrlRedirect(url);
|
|
info = await parsePrefixes(page, info); // Fill status/engines/isMod
|
|
const structuredText = await getMainPostStructuredText(page);
|
|
const overview = getOverview(structuredText, info.isMod);
|
|
const parsedInfos = parseConversationPage(structuredText);
|
|
const previewSource = getGamePreviewSource(page);
|
|
const changelog = getLastChangelog(page);
|
|
|
|
// Fill in the GameInfo element with the information obtained
|
|
info.name = await title;
|
|
info.author = await author;
|
|
info.tags = await tags;
|
|
info.f95url = await redirectUrl;
|
|
info.overview = overview;
|
|
info.lastUpdate = info.isMod
|
|
? parsedInfos.UPDATED
|
|
: parsedInfos.THREAD_UPDATED;
|
|
info.previewSource = await previewSource;
|
|
info.changelog = await changelog;
|
|
info.version = await exports.getGameVersionFromTitle(browser, info);
|
|
|
|
//let downloadData = getGameDownloadLink(page);
|
|
//info.downloadInfo = await downloadData;
|
|
/* Downloading games without going directly to
|
|
* the platform appears to be prohibited by
|
|
* the guidelines. It is therefore useless to
|
|
* keep the links for downloading the games. */
|
|
|
|
await page.close(); // Close the page
|
|
shared.logger.info("Founded data for " + info.name);
|
|
return info;
|
|
};
|
|
|
|
/**
|
|
* Obtain the game version without parsing again all the data of the game.
|
|
* @param {puppeteer.Browser} browser Browser object used for navigation
|
|
* @param {GameInfo} info Information about the game
|
|
* @returns {Promise<String>} Online version of the game
|
|
*/
|
|
module.exports.getGameVersionFromTitle = async function (browser, info) {
|
|
const page = await preparePage(browser); // Set new isolated page
|
|
await page.setCookie(...shared.cookies); // Set cookies to avoid login
|
|
await page.goto(info.f95url, {
|
|
waitUntil: shared.WAIT_STATEMENT,
|
|
}); // Go to the game page and wait until it loads
|
|
|
|
// Get the title
|
|
const titleHTML = await page.evaluate(
|
|
/* istanbul ignore next */
|
|
(selector) => document.querySelector(selector).innerHTML,
|
|
selectorK.GAME_TITLE
|
|
);
|
|
const title = HTMLParser.parse(titleHTML).childNodes.pop().rawText;
|
|
|
|
// The title is in the following format: [PREFIXES] NAME GAME [VERSION] [AUTHOR]
|
|
const startIndex = title.indexOf("[") + 1;
|
|
const endIndex = title.indexOf("]", startIndex);
|
|
let version = title.substring(startIndex, endIndex).trim().toUpperCase();
|
|
if (version.startsWith("V")) version = version.replace("V", ""); // Replace only the first occurrence
|
|
await page.close();
|
|
return cleanFSString(version);
|
|
};
|
|
|
|
//#region Private methods
|
|
/**
|
|
* Clean a string from invalid File System chars.
|
|
* @param {String} s
|
|
* @returns {String}
|
|
*/
|
|
function cleanFSString(s) {
|
|
const rx = /[/\\?%*:|"<>]/g;
|
|
return s.replace(rx, "");
|
|
}
|
|
|
|
/**
|
|
* @private
|
|
* Get the game description from its web page.
|
|
* Different processing depending on whether the game is a mod or not.
|
|
* @param {String} text Structured text extracted from the game's web page
|
|
* @param {Boolean} isMod Specify if it is a game or a mod
|
|
* @returns {Promise<String>} Game description
|
|
*/
|
|
function getOverview(text, isMod) {
|
|
// Get overview (different parsing for game and mod)
|
|
let overviewEndIndex;
|
|
if (isMod) overviewEndIndex = text.indexOf("Updated");
|
|
else overviewEndIndex = text.indexOf("Thread Updated");
|
|
return text.substring(0, overviewEndIndex).replace("Overview:\n", "").trim();
|
|
}
|
|
|
|
/**
|
|
* @private
|
|
* Extrapolate the page structure by removing the element tags
|
|
* and leaving only the text and its spacing.
|
|
* @param {puppeteer.Page} page Page containing the text
|
|
* @returns {Promise<String>} Structured text
|
|
*/
|
|
async function getMainPostStructuredText(page) {
|
|
// Gets the first post, where are listed all the game's informations
|
|
const post = (await page.$$(selectorK.THREAD_POSTS))[0];
|
|
|
|
// The info are plain text so we need to parse the HTML code
|
|
const bodyHTML = await page.evaluate(
|
|
/* istanbul ignore next */
|
|
(mainPost) => mainPost.innerHTML,
|
|
post
|
|
);
|
|
return HTMLParser.parse(bodyHTML).structuredText;
|
|
}
|
|
|
|
/**
|
|
* @private
|
|
* Extrapolates and cleans the author from the page passed by parameter.
|
|
* @param {puppeteer.Page} page Page containing the author to be extrapolated
|
|
* @returns {Promise<String>} Game author
|
|
*/
|
|
async function getGameAuthor(page) {
|
|
// Get the game/mod name (without square brackets)
|
|
const titleHTML = await page.evaluate(
|
|
/* istanbul ignore next */
|
|
(selector) => document.querySelector(selector).innerHTML,
|
|
selectorK.GAME_TITLE
|
|
);
|
|
const structuredTitle = HTMLParser.parse(titleHTML);
|
|
|
|
// The last element **shoud be** the title without prefixes (engines, status, other...)
|
|
const gameTitle = structuredTitle.childNodes.pop().rawText;
|
|
|
|
// The last square brackets contain the author
|
|
const startTitleIndex = gameTitle.lastIndexOf("[") + 1;
|
|
return gameTitle.substring(startTitleIndex, gameTitle.length - 1).trim();
|
|
}
|
|
|
|
/**
|
|
* @private
|
|
* Process the post text to get all the useful
|
|
* information in the format *DESCRIPTOR : VALUE*.
|
|
* @param {String} text Structured text of the post
|
|
* @returns {Object} Dictionary of information
|
|
*/
|
|
function parseConversationPage(text) {
|
|
const dataPairs = {};
|
|
|
|
// The information searched in the game post are one per line
|
|
const splittedText = text.split("\n");
|
|
for (const line of splittedText) {
|
|
if (!line.includes(":")) continue;
|
|
|
|
// Create pair key/value
|
|
const splitted = line.split(":");
|
|
const key = splitted[0].trim().toUpperCase().replace(/ /g, "_"); // Uppercase to avoid mismatch
|
|
const value = splitted[1].trim();
|
|
|
|
// Add pair to the dict if valid
|
|
if (value !== "") dataPairs[key] = value;
|
|
}
|
|
|
|
return dataPairs;
|
|
}
|
|
|
|
/**
|
|
* @private
|
|
* Gets the URL of the image used as a preview for the game in the conversation.
|
|
* @param {puppeteer.Page} page Page containing the URL to be extrapolated
|
|
* @returns {Promise<String>} URL (String) of the image or null if failed to get it
|
|
*/
|
|
async function getGamePreviewSource(page) {
|
|
// Wait for the selector or return an empty value
|
|
try {
|
|
await page.waitForSelector(selectorK.GAME_IMAGES);
|
|
} catch {
|
|
return null;
|
|
}
|
|
|
|
const src = await page.evaluate(
|
|
/* istanbul ignore next */
|
|
(selector) => {
|
|
// Get the firs image available
|
|
const img = document.querySelector(selector);
|
|
|
|
if (img) return img.getAttribute("src");
|
|
else return null;
|
|
},
|
|
selectorK.GAME_IMAGES
|
|
);
|
|
|
|
// Check if the URL is valid
|
|
return urlHelper.isStringAValidURL(src) ? src : null;
|
|
}
|
|
|
|
/**
|
|
* @private
|
|
* Extrapolates and cleans the title from the page passed by parameter.
|
|
* @param {puppeteer.Page} page Page containing the title to be extrapolated
|
|
* @returns {Promise<String>} Game title
|
|
*/
|
|
async function getGameTitle(page) {
|
|
// Get the game/mod name (without square brackets)
|
|
const titleHTML = await page.evaluate(
|
|
/* istanbul ignore next */
|
|
(selector) => document.querySelector(selector).innerHTML,
|
|
selectorK.GAME_TITLE
|
|
);
|
|
const structuredTitle = HTMLParser.parse(titleHTML);
|
|
|
|
// The last element **shoud be** the title without prefixes (engines, status, other...)
|
|
const gameTitle = structuredTitle.childNodes.pop().rawText;
|
|
const endTitleIndex = gameTitle.indexOf("[");
|
|
return gameTitle.substring(0, endTitleIndex).trim();
|
|
}
|
|
|
|
/**
|
|
* @private
|
|
* Get the alphabetically sorted list of tags associated with the game.
|
|
* @param {puppeteer.Page} page Page containing the tags to be extrapolated
|
|
* @returns {Promise<String[]>} List of uppercase tags
|
|
*/
|
|
async function getGameTags(page) {
|
|
const tags = [];
|
|
|
|
// Get the game tags
|
|
for (const handle of await page.$$(selectorK.GAME_TAGS)) {
|
|
const tag = await page.evaluate(
|
|
/* istanbul ignore next */
|
|
(element) => element.innerText,
|
|
handle
|
|
);
|
|
tags.push(tag.toUpperCase());
|
|
}
|
|
return tags.sort();
|
|
}
|
|
|
|
/**
|
|
* @private
|
|
* Process the game title prefixes to extract information such as game status,
|
|
* graphics engine used, and whether it is a mod or original game.
|
|
* @param {puppeteer.Page} page Page containing the prefixes to be extrapolated
|
|
* @param {GameInfo} info Object to assign the identified information to
|
|
* @returns {Promise<GameInfo>} GameInfo object passed in to which the identified information has been added
|
|
*/
|
|
async function parsePrefixes(page, info) {
|
|
// The 'Ongoing' status is not specified, only 'Abandoned'/'OnHold'/'Complete'
|
|
info.status = "ONGOING";
|
|
for (const handle of await page.$$(selectorK.GAME_TITLE_PREFIXES)) {
|
|
const value = await page.evaluate(
|
|
/* istanbul ignore next */
|
|
(element) => element.innerText,
|
|
handle
|
|
);
|
|
|
|
// Clean the prefix
|
|
const prefix = value.toUpperCase().replace("[", "").replace("]", "").trim();
|
|
|
|
// Getting infos...
|
|
if (shared.statuses.includes(prefix)) info.status = prefix;
|
|
else if (shared.engines.includes(prefix)) info.engine = prefix;
|
|
// This is not a game but a mod
|
|
else if (prefix === "MOD" || prefix === "CHEAT MOD") info.isMod = true;
|
|
}
|
|
return info;
|
|
}
|
|
|
|
/**
|
|
* @private
|
|
* Get the last changelog available for the game.
|
|
* @param {puppeteer.Page} page Page containing the changelog
|
|
* @returns {Promise<String>} Changelog for the last version or a empty string if no changelog is found
|
|
*/
|
|
async function getLastChangelog(page) {
|
|
// Gets the first post, where are listed all the game's informations
|
|
const post = (await page.$$(selectorK.THREAD_POSTS))[0];
|
|
|
|
const spoiler = await post.$(selectorK.THREAD_LAST_CHANGELOG);
|
|
if (!spoiler) return "";
|
|
|
|
const changelogHTML = await page.evaluate(
|
|
/* istanbul ignore next */
|
|
(e) => e.innerText,
|
|
spoiler
|
|
);
|
|
let parsedText = HTMLParser.parse(changelogHTML).structuredText;
|
|
|
|
// Clean the text
|
|
if (parsedText.startsWith("Spoiler"))
|
|
parsedText = parsedText.replace("Spoiler", "");
|
|
if (parsedText.startsWith(":")) parsedText = parsedText.replace(":", "");
|
|
return parsedText.trim();
|
|
}
|
|
|
|
/**
|
|
* @private
|
|
* Get game download links for different platforms.
|
|
* @param {puppeteer.Page} page Page containing the links to be extrapolated
|
|
* @returns {Promise<GameDownload[]>} List of objects used for game download
|
|
* @deprecated
|
|
*/
|
|
/* istanbul ignore next */
|
|
// skipcq: JS-0128
|
|
async function getGameDownloadLink(page) {
|
|
// Most used hosting platforms
|
|
const hostingPlatforms = [
|
|
"MEGA",
|
|
"NOPY",
|
|
"FILESUPLOAD",
|
|
"MIXDROP",
|
|
"UPLOADHAVEN",
|
|
"PIXELDRAIN",
|
|
"FILESFM",
|
|
];
|
|
|
|
// Supported OS platforms
|
|
const platformOS = ["WIN", "LINUX", "MAC", "ALL"];
|
|
|
|
// Gets the <span> which contains the download links
|
|
const temp = await page.$$(selectorK.DOWNLOAD_LINKS_CONTAINER);
|
|
if (temp.length === 0) return [];
|
|
|
|
// Look for the container that contains the links
|
|
// It is necessary because the same css selector
|
|
// also identifies other elements on the page
|
|
let container = null;
|
|
for (const candidate of temp) {
|
|
if (container !== null) break;
|
|
const upperText = (
|
|
await page.evaluate(
|
|
/* istanbul ignore next */
|
|
(e) => e.innerText,
|
|
candidate
|
|
)
|
|
).toUpperCase();
|
|
|
|
// Search if the container contains the name of a hosting platform
|
|
for (const p of hostingPlatforms) {
|
|
if (upperText.includes(p)) {
|
|
container = candidate;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (container === null) return [];
|
|
|
|
// Extract the HTML text from the container
|
|
const searchText = (
|
|
await page.evaluate(
|
|
/* istanbul ignore next */
|
|
(e) => e.innerHTML,
|
|
container
|
|
)
|
|
).toLowerCase();
|
|
|
|
// Parse the download links
|
|
const downloadData = [];
|
|
for (const platform of platformOS) {
|
|
const data = extractGameHostingData(platform, searchText);
|
|
downloadData.push(...data);
|
|
}
|
|
return downloadData;
|
|
}
|
|
|
|
/**
|
|
* @private
|
|
* From the HTML text it extracts the game download links for the specified operating system.
|
|
* @param {String} platform Name of the operating system to look for a compatible link to.
|
|
* It can only be *WIN/LINUX/MAC/ALL*
|
|
* @param {String} text HTML string to extract links from
|
|
* @returns {GameDownload[]} List of game download links for the selected platform
|
|
* @deprecated
|
|
*/
|
|
/* istanbul ignore next */
|
|
function extractGameHostingData(platform, text) {
|
|
const PLATFORM_BOLD_OPEN = "<b>";
|
|
const CONTAINER_SPAN_CLOSE = "</span>";
|
|
const LINK_OPEN = "<a";
|
|
const LINK_CLOSE = "</a>";
|
|
const HREF_START = "href='";
|
|
const HREF_END = "'";
|
|
const TAG_CLOSE = ">";
|
|
|
|
// Identify the individual platforms
|
|
let startIndex = text.indexOf(platform.toLowerCase());
|
|
if (startIndex === -1) return [];
|
|
else startIndex += platform.length;
|
|
|
|
// Find the <b>platform</b>
|
|
let endIndex =
|
|
text.indexOf(PLATFORM_BOLD_OPEN, startIndex) + PLATFORM_BOLD_OPEN.length;
|
|
|
|
// Find the end of the container
|
|
if (endIndex === -1)
|
|
endIndex =
|
|
text.indexOf(CONTAINER_SPAN_CLOSE, startIndex) +
|
|
CONTAINER_SPAN_CLOSE.length;
|
|
|
|
text = text.substring(startIndex, endIndex);
|
|
|
|
const downloadData = [];
|
|
const linkTags = text.split(LINK_OPEN);
|
|
for (const tag of linkTags) {
|
|
// Ignore non-link string
|
|
if (!tag.includes(HREF_START)) continue;
|
|
|
|
// Find the hosting platform name
|
|
startIndex = tag.indexOf(TAG_CLOSE) + TAG_CLOSE.length;
|
|
endIndex = tag.indexOf(LINK_CLOSE, startIndex);
|
|
const hosting = tag.substring(startIndex, endIndex);
|
|
|
|
// Find the 'href' attribute
|
|
startIndex = tag.indexOf(HREF_START) + HREF_START.length;
|
|
endIndex = tag.indexOf(HREF_END, startIndex);
|
|
const link = tag.substring(startIndex, endIndex);
|
|
|
|
if (urlHelper.isStringAValidURL(link)) {
|
|
const gd = new GameDownload();
|
|
gd.hosting = hosting.toUpperCase();
|
|
gd.link = link;
|
|
gd.supportedOS = platform.toUpperCase();
|
|
|
|
downloadData.push(gd);
|
|
}
|
|
}
|
|
return downloadData;
|
|
}
|
|
|
|
//#endregion Private methods
|