2020-10-31 15:00:26 +00:00
|
|
|
"use strict";
|
|
|
|
|
|
|
|
// Public modules from npm
|
|
|
|
const cheerio = require("cheerio");
|
|
|
|
|
|
|
|
// Modules from file
|
|
|
|
const { fetchHTML, getUrlRedirect } = require("./network-helper.js");
|
|
|
|
const shared = require("./shared.js");
|
|
|
|
const GameInfo = require("./classes/game-info.js");
|
|
|
|
const f95Selector = require("./constants/css-selector.js");
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @protected
|
|
|
|
* Get information from the game's main page.
|
|
|
|
* @param {String} url URL of the game/mod to extract data from
|
|
|
|
* @return {Promise<GameInfo>} Complete information about the game you are
|
2020-11-09 16:53:29 +00:00
|
|
|
* looking for or `null` if is impossible to parse information
|
2020-10-31 15:00:26 +00:00
|
|
|
*/
|
|
|
|
module.exports.getGameInfo = async function (url) {
|
|
|
|
shared.logger.info("Obtaining game info");
|
|
|
|
|
|
|
|
// Fetch HTML and prepare Cheerio
|
|
|
|
const html = await fetchHTML(url);
|
|
|
|
const $ = cheerio.load(html);
|
|
|
|
const body = $("body");
|
|
|
|
const mainPost = $(f95Selector.GS_POSTS).first();
|
|
|
|
|
|
|
|
// Extract data
|
|
|
|
const titleData = extractInfoFromTitle(body);
|
|
|
|
const tags = extractTags(body);
|
2020-11-01 20:56:12 +00:00
|
|
|
const prefixesData = parseGamePrefixes(body);
|
|
|
|
const src = extractPreviewSource(body);
|
|
|
|
const changelog = extractChangelog(mainPost);
|
2020-10-31 15:00:26 +00:00
|
|
|
const structuredData = extractStructuredData(body);
|
2020-11-09 16:53:29 +00:00
|
|
|
|
|
|
|
// Sometimes the JSON-LD are not set, especially in low-profile game
|
|
|
|
if(!structuredData) return null;
|
|
|
|
|
|
|
|
const parsedInfos = parseMainPostText(structuredData.description);
|
|
|
|
const overview = getOverview(structuredData.description, prefixesData.mod);
|
2020-10-31 15:00:26 +00:00
|
|
|
|
|
|
|
// Obtain the updated URL
|
|
|
|
const redirectUrl = await getUrlRedirect(url);
|
|
|
|
|
|
|
|
// Fill in the GameInfo element with the information obtained
|
|
|
|
const info = new GameInfo();
|
2020-11-07 17:16:20 +00:00
|
|
|
info.id = extractIDFromURL(url);
|
2020-10-31 15:00:26 +00:00
|
|
|
info.name = titleData.name;
|
|
|
|
info.author = titleData.author;
|
2020-11-01 20:56:12 +00:00
|
|
|
info.isMod = prefixesData.mod;
|
|
|
|
info.engine = prefixesData.engine;
|
|
|
|
info.status = prefixesData.status;
|
2020-10-31 15:00:26 +00:00
|
|
|
info.tags = tags;
|
|
|
|
info.url = redirectUrl;
|
2020-11-01 20:56:12 +00:00
|
|
|
info.language = parsedInfos.Language;
|
2020-10-31 15:00:26 +00:00
|
|
|
info.overview = overview;
|
2020-11-01 20:56:12 +00:00
|
|
|
info.supportedOS = parsedInfos.SupportedOS;
|
|
|
|
info.censored = parsedInfos.Censored;
|
|
|
|
info.lastUpdate = parsedInfos.LastUpdate;
|
|
|
|
info.previewSrc = src;
|
|
|
|
info.changelog = changelog;
|
2020-10-31 15:00:26 +00:00
|
|
|
info.version = titleData.version;
|
|
|
|
|
|
|
|
shared.logger.info(`Founded data for ${info.name}`);
|
|
|
|
return info;
|
|
|
|
};
|
|
|
|
|
|
|
|
//#region Private methods
|
|
|
|
/**
|
|
|
|
* @private
|
2020-11-01 20:56:12 +00:00
|
|
|
* Parse the game prefixes obtaining the engine used,
|
|
|
|
* the advancement status and if the game is actually a game or a mod.
|
|
|
|
* @param {cheerio.Cheerio} body Page `body` selector
|
2020-11-02 09:01:39 +00:00
|
|
|
* @returns {Object.<string, object>} Dictionary of values with keys `engine`, `status`, `mod`
|
2020-11-01 20:56:12 +00:00
|
|
|
*/
|
|
|
|
function parseGamePrefixes(body) {
|
|
|
|
shared.logger.trace("Parsing prefixes...");
|
|
|
|
|
|
|
|
// Local variables
|
|
|
|
let mod = false,
|
|
|
|
engine = null,
|
|
|
|
status = null;
|
|
|
|
|
|
|
|
// Obtain the title prefixes
|
|
|
|
const prefixeElements = body.find(f95Selector.GT_TITLE_PREFIXES);
|
|
|
|
|
|
|
|
prefixeElements.each(function parseGamePrefix(idx, el) {
|
|
|
|
// Obtain the prefix text
|
|
|
|
let prefix = cheerio.load(el).text().trim();
|
|
|
|
|
|
|
|
// Remove the square brackets
|
|
|
|
prefix = prefix.replace("[", "").replace("]", "");
|
|
|
|
|
|
|
|
// Check what the prefix indicates
|
|
|
|
if (isEngine(prefix)) engine = prefix;
|
|
|
|
else if (isStatus(prefix)) status = prefix;
|
|
|
|
else if (isMod(prefix)) mod = true;
|
|
|
|
});
|
|
|
|
|
|
|
|
// If the status is not set, then the game in in development (Ongoing)
|
|
|
|
if (!status) status = "Ongoing";
|
|
|
|
|
|
|
|
return {
|
|
|
|
engine,
|
|
|
|
status,
|
|
|
|
mod
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @private
|
|
|
|
* Extracts all the possible informations from the title.
|
2020-10-31 15:00:26 +00:00
|
|
|
* @param {cheerio.Cheerio} body Page `body` selector
|
2020-11-02 09:01:39 +00:00
|
|
|
* @returns {Object.<string, string>} Dictionary of values with keys `name`, `author`, `version`
|
2020-10-31 15:00:26 +00:00
|
|
|
*/
|
|
|
|
function extractInfoFromTitle(body) {
|
2020-11-01 20:56:12 +00:00
|
|
|
shared.logger.trace("Extracting information from title...");
|
2020-10-31 15:00:26 +00:00
|
|
|
const title = body
|
|
|
|
.find(f95Selector.GT_TITLE)
|
|
|
|
.text()
|
|
|
|
.trim();
|
|
|
|
|
|
|
|
// From the title we can extract: Name, author and version
|
2020-11-01 20:56:12 +00:00
|
|
|
// [PREFIXES] TITLE [VERSION] [AUTHOR]
|
2020-11-01 13:56:07 +00:00
|
|
|
const matches = title.match(/\[(.*?)\]/g);
|
2020-10-31 15:00:26 +00:00
|
|
|
|
2020-11-01 20:56:12 +00:00
|
|
|
// Get the title name
|
|
|
|
let name = title;
|
|
|
|
matches.forEach(function replaceElementsInTitle(e) {
|
|
|
|
name = name.replace(e, "");
|
2020-10-31 15:00:26 +00:00
|
|
|
});
|
2020-11-01 20:56:12 +00:00
|
|
|
name = name.trim();
|
|
|
|
|
|
|
|
// The version is the penultimate element.
|
|
|
|
// If the matches are less than 2, than the title
|
|
|
|
// is malformes and only the author is fetched
|
|
|
|
// (usually the author is always present)
|
|
|
|
let version = null;
|
2020-11-02 16:26:06 +00:00
|
|
|
if (matches.length >= 2) {
|
2020-11-02 17:47:24 +00:00
|
|
|
// The regex [[\]]+ remove the square brackets
|
2020-11-02 16:26:06 +00:00
|
|
|
version = matches[matches.length - 2].replace(/[[\]]+/g, "").trim();
|
|
|
|
|
2020-11-07 17:05:48 +00:00
|
|
|
// Remove the trailing "v"
|
2020-11-09 16:30:27 +00:00
|
|
|
if (version[0] === "v") version = version.replace("v", "");
|
2020-11-02 16:26:06 +00:00
|
|
|
}
|
2020-11-01 20:56:12 +00:00
|
|
|
else shared.logger.trace(`Malformed title: ${title}`);
|
|
|
|
|
2020-11-02 17:47:24 +00:00
|
|
|
// Last element (the regex [[\]]+ remove the square brackets)
|
2020-11-01 20:56:12 +00:00
|
|
|
const author = matches[matches.length - 1].replace(/[[\]]+/g, "").trim();
|
2020-10-31 15:00:26 +00:00
|
|
|
|
|
|
|
return {
|
|
|
|
name,
|
|
|
|
version,
|
|
|
|
author,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @private
|
|
|
|
* Gets the tags used to classify the game.
|
|
|
|
* @param {cheerio.Cheerio} body Page `body` selector
|
|
|
|
* @returns {String[]} List of tags
|
|
|
|
*/
|
|
|
|
function extractTags(body) {
|
2020-11-01 20:56:12 +00:00
|
|
|
shared.logger.trace("Extracting tags...");
|
|
|
|
|
2020-10-31 15:00:26 +00:00
|
|
|
// Get the game tags
|
|
|
|
const tagResults = body.find(f95Selector.GT_TAGS);
|
2020-11-01 20:56:12 +00:00
|
|
|
return tagResults.map(function parseGameTags(idx, el) {
|
|
|
|
return cheerio.load(el).text().trim();
|
2020-10-31 15:00:26 +00:00
|
|
|
}).get();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @private
|
2020-11-01 20:56:12 +00:00
|
|
|
* Gets the URL of the image used as a preview.
|
|
|
|
* @param {cheerio.Cheerio} body Page `body` selector
|
|
|
|
* @returns {String} URL of the image
|
2020-10-31 15:00:26 +00:00
|
|
|
*/
|
2020-11-01 20:56:12 +00:00
|
|
|
function extractPreviewSource(body) {
|
|
|
|
shared.logger.trace("Extracting image preview source...");
|
|
|
|
const image = body.find(f95Selector.GT_IMAGES);
|
2020-10-31 15:00:26 +00:00
|
|
|
|
2020-11-01 20:56:12 +00:00
|
|
|
// The "src" attribute is rendered only in a second moment,
|
|
|
|
// we need the "static" src value saved in the attribute "data-src"
|
|
|
|
const source = image ? image.attr("data-src") : null;
|
|
|
|
return source;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @private
|
|
|
|
* Gets the changelog of the latest version.
|
|
|
|
* @param {cheerio.Cheerio} mainPost main post selector
|
|
|
|
* @returns {String} Changelog of the last version or `null` if no changelog is fetched
|
|
|
|
*/
|
|
|
|
function extractChangelog(mainPost) {
|
|
|
|
shared.logger.trace("Extracting last changelog...");
|
|
|
|
|
2020-11-20 09:23:06 +00:00
|
|
|
// Obtain the changelog for ALL the versions
|
2020-11-01 20:56:12 +00:00
|
|
|
let changelog = mainPost.find(f95Selector.GT_LAST_CHANGELOG).text().trim();
|
|
|
|
|
2020-11-20 09:23:06 +00:00
|
|
|
// Parse the latest changelog
|
|
|
|
const endChangelog = changelog.indexOf("\nv"); // \n followed by version (v)
|
|
|
|
if (endChangelog !== -1) changelog = changelog.substring(0, endChangelog + 1);
|
|
|
|
|
2020-11-01 20:56:12 +00:00
|
|
|
// Clean changelog
|
|
|
|
changelog = changelog.replace("Spoiler", "");
|
2020-11-20 09:23:06 +00:00
|
|
|
changelog = changelog.replace(/\n+/g, "\n"); // Multiple /n
|
2020-11-07 17:05:48 +00:00
|
|
|
changelog = changelog.trim();
|
2020-11-01 20:56:12 +00:00
|
|
|
|
2020-11-19 19:45:05 +00:00
|
|
|
// Delete the version at the start of the changelog
|
|
|
|
const firstNewLine = changelog.indexOf("\n");
|
|
|
|
const supposedVersion = changelog.substring(0, firstNewLine);
|
|
|
|
if (supposedVersion[0] === "v") changelog = changelog.substring(firstNewLine).trim();
|
|
|
|
|
2020-11-01 20:56:12 +00:00
|
|
|
// Return changelog
|
|
|
|
return changelog ? changelog : null;
|
2020-10-31 15:00:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @private
|
|
|
|
* Process the main post text to get all the useful
|
|
|
|
* information in the format *DESCRIPTOR : VALUE*.
|
2020-11-02 09:01:39 +00:00
|
|
|
* Gets "standard" values such as: `Language`, `SupportedOS`, `Censored`, and `LastUpdate`.
|
|
|
|
* All non-canonical values are instead grouped together as a dictionary with the key `Various`.
|
2020-10-31 15:00:26 +00:00
|
|
|
* @param {String} text Structured text of the post
|
2020-11-02 09:01:39 +00:00
|
|
|
* @returns {Object.<string, object>} Dictionary of information
|
2020-10-31 15:00:26 +00:00
|
|
|
*/
|
|
|
|
function parseMainPostText(text) {
|
2020-11-01 20:56:12 +00:00
|
|
|
shared.logger.trace("Parsing main post raw text...");
|
|
|
|
|
|
|
|
const data = {};
|
2020-10-31 15:00:26 +00:00
|
|
|
|
|
|
|
// The information searched in the game post are one per line
|
|
|
|
const splittedText = text.split("\n");
|
|
|
|
for (const line of splittedText) {
|
|
|
|
if (!line.includes(":")) continue;
|
|
|
|
|
|
|
|
// Create pair key/value
|
|
|
|
const splitted = line.split(":");
|
|
|
|
const key = splitted[0].trim().toUpperCase().replace(/ /g, "_"); // Uppercase to avoid mismatch
|
|
|
|
const value = splitted[1].trim();
|
|
|
|
|
|
|
|
// Add pair to the dict if valid
|
2020-11-01 20:56:12 +00:00
|
|
|
if (value !== "") data[key] = value;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Parse the standard pairs
|
|
|
|
const parsedDict = {};
|
|
|
|
|
|
|
|
// Check if the game is censored
|
|
|
|
if (data.CENSORED) {
|
|
|
|
const censored = data.CENSORED.toUpperCase() === "NO" ? false : true;
|
|
|
|
parsedDict["Censored"] = censored;
|
|
|
|
delete data.CENSORED;
|
2020-10-31 15:00:26 +00:00
|
|
|
}
|
|
|
|
|
2020-11-01 20:56:12 +00:00
|
|
|
// Last update of the main post
|
|
|
|
if (data.UPDATED) {
|
|
|
|
parsedDict["LastUpdate"] = new Date(data.UPDATED);
|
|
|
|
delete data.UPDATED;
|
|
|
|
}
|
|
|
|
else if (data.THREAD_UPDATED) {
|
|
|
|
parsedDict["LastUpdate"] = new Date(data.THREAD_UPDATED);
|
|
|
|
delete data.THREAD_UPDATED;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Parse the supported OS
|
|
|
|
if (data.OS) {
|
|
|
|
const listOS = [];
|
|
|
|
|
|
|
|
// Usually the string is something like "Windows, Linux, Mac"
|
|
|
|
const splitted = data.OS.split(",");
|
|
|
|
splitted.forEach(function (os) {
|
|
|
|
listOS.push(os.trim());
|
|
|
|
});
|
|
|
|
|
|
|
|
parsedDict["SupportedOS"] = listOS;
|
|
|
|
delete data.OS;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Rename the key for the language
|
|
|
|
if (data.LANGUAGE) {
|
|
|
|
parsedDict["Language"] = data.LANGUAGE;
|
|
|
|
delete data.LANGUAGE;
|
|
|
|
}
|
|
|
|
|
|
|
|
// What remains is added to a sub dictionary
|
|
|
|
parsedDict["Various"] = data;
|
|
|
|
|
|
|
|
return parsedDict;
|
2020-10-31 15:00:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @private
|
|
|
|
* Extracts and processes the JSON-LD values found at the bottom of the page.
|
|
|
|
* @param {cheerio.Cheerio} body Page `body` selector
|
2020-11-02 09:01:39 +00:00
|
|
|
* @returns {Object.<string, string>} JSON-LD or `null` if no valid JSON is found
|
2020-10-31 15:00:26 +00:00
|
|
|
*/
|
|
|
|
function extractStructuredData(body) {
|
2020-11-01 20:56:12 +00:00
|
|
|
shared.logger.trace("Extracting JSON-LD data...");
|
|
|
|
const structuredDataElements = body.find(f95Selector.GT_JSONLD);
|
|
|
|
const json = structuredDataElements.map(function parseScriptTag(idx, el) {
|
|
|
|
// Get the element HTML
|
|
|
|
const html = cheerio.load(el).html().trim();
|
|
|
|
|
|
|
|
// Obtain the JSON-LD
|
|
|
|
const data = html
|
|
|
|
.replace("<script type=\"application/ld+json\">", "")
|
|
|
|
.replace("</script>", "");
|
|
|
|
|
|
|
|
// Convert the string to an object
|
|
|
|
const json = JSON.parse(data);
|
|
|
|
|
|
|
|
// Return only the data of the game
|
|
|
|
if (json["@type"] === "Book") return json;
|
|
|
|
}).get();
|
2020-11-09 16:53:29 +00:00
|
|
|
return json.lenght !== 0 ? json[0] : null;
|
2020-10-31 15:00:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @private
|
|
|
|
* Get the game description from its web page.
|
|
|
|
* Different processing depending on whether the game is a mod or not.
|
|
|
|
* @param {String} text Structured text extracted from the game's web page
|
|
|
|
* @param {Boolean} mod Specify if it is a game or a mod
|
2020-11-02 09:21:36 +00:00
|
|
|
* @returns {String} Game description
|
2020-10-31 15:00:26 +00:00
|
|
|
*/
|
|
|
|
function getOverview(text, mod) {
|
2020-11-01 20:56:12 +00:00
|
|
|
shared.logger.trace("Extracting game overview...");
|
2020-11-02 09:21:36 +00:00
|
|
|
|
2020-10-31 15:00:26 +00:00
|
|
|
// Get overview (different parsing for game and mod)
|
|
|
|
const overviewEndIndex = mod ? text.indexOf("Updated") : text.indexOf("Thread Updated");
|
|
|
|
return text.substring(0, overviewEndIndex).replace("Overview:\n", "").trim();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @private
|
|
|
|
* Check if the prefix is a game's engine.
|
|
|
|
* @param {String} prefix Prefix to check
|
|
|
|
* @return {Boolean}
|
|
|
|
*/
|
|
|
|
function isEngine(prefix) {
|
|
|
|
const engines = toUpperCaseArray(shared.engines);
|
|
|
|
return engines.includes(prefix.toUpperCase());
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @private
|
|
|
|
* Check if the prefix is a game's status.
|
|
|
|
* @param {String} prefix Prefix to check
|
|
|
|
* @return {Boolean}
|
|
|
|
*/
|
|
|
|
function isStatus(prefix) {
|
|
|
|
const statuses = toUpperCaseArray(shared.statuses);
|
|
|
|
return statuses.includes(prefix.toUpperCase());
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @private
|
|
|
|
* Check if the prefix indicates a mod.
|
|
|
|
* @param {String} prefix Prefix to check
|
|
|
|
* @return {Boolean}
|
|
|
|
*/
|
|
|
|
function isMod(prefix) {
|
|
|
|
const modPrefixes = ["MOD", "CHEAT MOD"];
|
|
|
|
return modPrefixes.includes(prefix.toUpperCase());
|
|
|
|
}
|
|
|
|
|
2020-11-07 17:16:20 +00:00
|
|
|
/**
|
|
|
|
* @private
|
|
|
|
* Extracts the game's unique ID from the game's URL.
|
|
|
|
* @param {String} url Game's URL
|
|
|
|
* @return {Number} Game's ID
|
|
|
|
*/
|
|
|
|
function extractIDFromURL(url) {
|
|
|
|
// URL are in the format https://f95zone.to/threads/GAMENAME-VERSION-DEVELOPER.ID/
|
2020-11-30 12:22:35 +00:00
|
|
|
// or https://f95zone.to/threads/ID/
|
2020-11-30 12:58:38 +00:00
|
|
|
const match = url.match(/([0-9]+)(?=\/|\b)(?!-)/);
|
2020-11-30 12:22:35 +00:00
|
|
|
if(!match) return -1;
|
2020-11-07 17:16:20 +00:00
|
|
|
|
|
|
|
// Parse and return number
|
2020-11-30 12:22:35 +00:00
|
|
|
return parseInt(match[0], 10);
|
2020-11-07 17:16:20 +00:00
|
|
|
}
|
|
|
|
|
2020-10-31 15:00:26 +00:00
|
|
|
/**
|
|
|
|
* @private
|
|
|
|
* Makes an array of strings uppercase.
|
|
|
|
* @param {String[]} a
|
|
|
|
* @returns {String[]}
|
|
|
|
*/
|
|
|
|
function toUpperCaseArray(a) {
|
2020-11-01 20:56:12 +00:00
|
|
|
// If the array is empty, return
|
|
|
|
if(a.length === 0) return [];
|
|
|
|
|
2020-10-31 15:00:26 +00:00
|
|
|
/**
|
|
|
|
* Makes a string uppercase.
|
|
|
|
* @param {String} s
|
|
|
|
* @returns {String}
|
|
|
|
*/
|
|
|
|
function toUpper(s) {
|
|
|
|
return s.toUpperCase();
|
|
|
|
}
|
|
|
|
return a.map(toUpper);
|
|
|
|
}
|
|
|
|
//#endregion Private methods
|