Better logging, completed game scraping, remove unused values

pull/44/head
MillenniumEarl 2020-11-01 21:56:12 +01:00
parent ecfd1784b1
commit 1391f28ad1
7 changed files with 414 additions and 284 deletions

View File

@ -24,9 +24,25 @@ class GameInfo {
*/ */
this.overview = null; this.overview = null;
/** /**
* List of tags associated with the game * Game language.
* @type String[] * @type String
*/ */
this.language = null;
/**
* List of supported OS.
* @type
*/
this.supportedOS = [];
/**
* Specify whether the game has censorship
* measures regarding NSFW scenes.
* @type Boolean
*/
this.censored = null;
/**
* List of tags associated with the game
* @type String[]
*/
this.tags = []; this.tags = [];
/** /**
* Graphics engine used for game development * Graphics engine used for game development
@ -86,6 +102,9 @@ class GameInfo {
author: this.author, author: this.author,
url: this.url, url: this.url,
overview: this.overview, overview: this.overview,
language: this.language,
supportedOS: this.supportedOS,
censored: this.censored,
engine: this.engine, engine: this.engine,
status: this.status, status: this.status,
previewSrc: this.previewSrc, previewSrc: this.previewSrc,

View File

@ -1,31 +1,28 @@
module.exports = Object.freeze({ module.exports = Object.freeze({
AVATAR_INFO: "span.avatar", BD_ENGINE_ID_SELECTOR: "div[id^=\"btn-prefix_1_\"]>span",
AVATAR_PIC: "a[href=\"/account/\"] > span.avatar > img[class^=\"avatar\"]", BD_STATUS_ID_SELECTOR: "div[id^=\"btn-prefix_4_\"]>span",
ENGINE_ID_SELECTOR: "div[id^=\"btn-prefix_1_\"]>span", GT_IMAGES: "img:not([title])[data-src^=\"https://attachments.f95zone.to\"][data-url=\"\"]",
FILTER_THREADS_BUTTON: "button[class=\"button--primary button\"]",
GT_IMAGES: "img[src^=\"https://attachments.f95zone.to\"]",
GT_TAGS: "a.tagItem", GT_TAGS: "a.tagItem",
GT_TITLE: "h1.p-title-value", GT_TITLE: "h1.p-title-value",
GT_TITLE_PREFIXES: "h1.p-title-value > a.labelLink > span[dir=\"auto\"]", GT_TITLE_PREFIXES: "h1.p-title-value > a.labelLink > span[dir=\"auto\"]",
LOGIN_BUTTON: "button.button--icon--login", GT_LAST_CHANGELOG: "div.bbCodeBlock-content > div:first-of-type",
LOGIN_MESSAGE_ERROR: "div.blockMessage.blockMessage--error.blockMessage--iconic", GT_JSONLD: "script[type=\"application/ld+json\"]",
ONLY_GAMES_THREAD_OPTION: "select[name=\"nodes[]\"] > option[value=\"2\"]",
PASSWORD_INPUT: "input[name=\"password\"]",
SEARCH_BUTTON: "form.block > * button.button--icon--search",
SEARCH_FORM_TEXTBOX: "input[name=\"keywords\"][type=\"search\"]",
SEARCH_ONLY_GAMES_OPTION: "select[name=\"c[nodes][]\"] > option[value=\"1\"]",
STATUS_ID_SELECTOR: "div[id^=\"btn-prefix_4_\"]>span",
GS_POSTS: "article.message-body:first-child > div.bbWrapper:first-of-type",
GS_RESULT_THREAD_TITLE: "h3.contentRow-title > a",
TITLE_ONLY_CHECKBOX: "form.block > * input[name=\"c[title_only]\"]",
WT_UNREAD_THREAD_CHECKBOX: "input[type=\"checkbox\"][name=\"unread\"]",
USERNAME_ELEMENT: "a[href=\"/account/\"] > span.p-navgroup-linkText",
USERNAME_INPUT: "input[name=\"login\"]",
WT_FILTER_POPUP_BUTTON: "a.filterBar-menuTrigger", WT_FILTER_POPUP_BUTTON: "a.filterBar-menuTrigger",
WT_NEXT_PAGE: "a.pageNav-jump--next", WT_NEXT_PAGE: "a.pageNav-jump--next",
WT_URLS: "a[href^=\"/threads/\"][data-tp-primary]", WT_URLS: "a[href^=\"/threads/\"][data-tp-primary]",
DOWNLOAD_LINKS_CONTAINER: "span[style=\"font-size: 18px\"]", WT_UNREAD_THREAD_CHECKBOX: "input[type=\"checkbox\"][name=\"unread\"]",
GS_POSTS: "article.message-body:first-child > div.bbWrapper:first-of-type",
GS_RESULT_THREAD_TITLE: "h3.contentRow-title > a",
GS_RESULT_BODY: "div.contentRow-main", GS_RESULT_BODY: "div.contentRow-main",
GS_MEMBERSHIP: "li > a:not(.username)", GS_MEMBERSHIP: "li > a:not(.username)",
GT_LAST_CHANGELOG: "div.bbCodeBlock-content > div:first-of-type", GET_REQUEST_TOKEN: "input[name=\"_xfToken\"]",
LOGIN_BUTTON: "button.button--icon--login",
LOGIN_MESSAGE_ERROR: "div.blockMessage.blockMessage--error.blockMessage--iconic",
PASSWORD_INPUT: "input[name=\"password\"]",
USERNAME_ELEMENT: "a[href=\"/account/\"] > span.p-navgroup-linkText",
USERNAME_INPUT: "input[name=\"login\"]",
AVATAR_INFO: "span.avatar",
AVATAR_PIC: "a[href=\"/account/\"] > span.avatar > img[class^=\"avatar\"]",
FILTER_THREADS_BUTTON: "button[class=\"button--primary button\"]",
}); });

View File

@ -13,14 +13,24 @@ const tough = require("tough-cookie");
// Modules from file // Modules from file
const shared = require("./shared.js"); const shared = require("./shared.js");
const f95url = require("./constants/url.js"); const f95url = require("./constants/url.js");
const f95selector = require("./constants/css-selector.js");
const LoginResult = require("./classes/login-result.js");
// Global variables // Global variables
const userAgent = const userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) " +
"Mozilla/5.0 (X11; Linux x86_64)" + "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0 Safari/605.1.15";
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.39 Safari/537.36";
axiosCookieJarSupport(axios); axiosCookieJarSupport(axios);
const cookieJar = new tough.CookieJar(); const cookieJar = new tough.CookieJar();
const commonConfig = {
headers: {
"User-Agent": userAgent,
"Connection": "keep-alive"
},
withCredentials: true,
jar: cookieJar // Used to store the token in the PC
};
/** /**
* @protected * @protected
* Gets the HTML code of a page. * Gets the HTML code of a page.
@ -28,19 +38,13 @@ const cookieJar = new tough.CookieJar();
* @returns {Promise<String>} HTML code or `null` if an error arise * @returns {Promise<String>} HTML code or `null` if an error arise
*/ */
module.exports.fetchHTML = async function (url) { module.exports.fetchHTML = async function (url) {
try { // Fetch the response of the platform
const response = await axios.get(url, { const response = await exports.fetchGETResponse(url);
headers: { if (!response) {
"User-Agent": userAgent shared.logger.warn(`Unable to fetch HTML for ${url}`);
},
withCredentials: true,
jar: cookieJar
});
return response.data;
} catch (e) {
shared.logger.error(`Error ${e.message} occurred while trying to fetch ${url}`);
return null; return null;
} }
return response.data;
}; };
/** /**
@ -49,12 +53,21 @@ module.exports.fetchHTML = async function (url) {
* and token obtained previously. Save cookies on your * and token obtained previously. Save cookies on your
* device after authentication. * device after authentication.
* @param {Credentials} credentials Platform access credentials * @param {Credentials} credentials Platform access credentials
* @returns {Promise<Boolean>} Result of the operation * @returns {Promise<LoginResul>} Result of the operation
*/ */
module.exports.autenticate = async function (credentials) { module.exports.autenticate = async function (credentials) {
shared.logger.info(`Authenticating with user ${credentials.username}`); shared.logger.info(`Authenticating with user ${credentials.username}`);
if (!credentials.token) throw new Error(`Invalid token for auth: ${credentials.token}`); if (!credentials.token) throw new Error(`Invalid token for auth: ${credentials.token}`);
// If the user is already logged, return
if(shared.isLogged) {
shared.logger.warn(`${credentials.username} already authenticated`);
return new LoginResult(true, "Already authenticated");
}
// Secure the URL
const secureURL = exports.enforceHttpsUrl(f95url.F95_LOGIN_URL);
// Prepare the parameters to send to the platform to authenticate // Prepare the parameters to send to the platform to authenticate
const params = new URLSearchParams(); const params = new URLSearchParams();
params.append("login", credentials.username); params.append("login", credentials.username);
@ -67,22 +80,23 @@ module.exports.autenticate = async function (credentials) {
params.append("website_code", ""); params.append("website_code", "");
params.append("_xfToken", credentials.token); params.append("_xfToken", credentials.token);
const config = {
headers: {
"User-Agent": userAgent,
"Content-Type": "application/x-www-form-urlencoded",
"Connection": "keep-alive"
},
withCredentials: true,
jar: cookieJar // Retrieve the stored cookies! What a pain to understand that this is a MUST!
};
try { try {
await axios.post(f95url.F95_LOGIN_URL, params, config); // Try to log-in
return true; const response = await axios.post(secureURL, params, commonConfig);
// Parse the response HTML
const $ = cheerio.load(response.data);
// Get the error message (if any) and remove the new line chars
const errorMessage = $("body").find(f95selector.LOGIN_MESSAGE_ERROR).text().replace(/\n/g, "");
// Return the result of the authentication
shared.isLogged = errorMessage === "";
if (errorMessage === "") return new LoginResult(true, "Authentication successful");
else return new LoginResult(false, errorMessage);
} catch (e) { } catch (e) {
shared.logger.error(`Error ${e.message} occurred while authenticating to ${f95url.F95_LOGIN_URL}`); shared.logger.error(`Error ${e.message} occurred while authenticating to ${secureURL}`);
return false; return new LoginResult(false, `Error ${e.message} while authenticating`);
} }
}; };
@ -91,25 +105,63 @@ module.exports.autenticate = async function (credentials) {
* @returns {Promise<String>} Token or `null` if an error arise * @returns {Promise<String>} Token or `null` if an error arise
*/ */
module.exports.getF95Token = async function() { module.exports.getF95Token = async function() {
// Fetch the response of the platform
const response = await exports.fetchGETResponse(f95url.F95_LOGIN_URL);
if (!response) {
shared.logger.warn("Unable to get the token for the session");
return null;
}
// The response is a HTML page, we need to find the <input> with name "_xfToken"
const $ = cheerio.load(response.data);
const token = $("body").find(f95selector.GET_REQUEST_TOKEN).attr("value");
return token;
};
/**
* @protected
* Gets the basic data used for game data processing
* (such as graphics engines and progress statuses)
* @deprecated
*/
module.exports.fetchPlatformData = async function() {
// Fetch the response of the platform
const response = await exports.fetchGETResponse(f95url.F95_LATEST_UPDATES);
if (!response) {
shared.logger.warn("Unable to get the token for the session");
return;
}
// The response is a HTML page, we need to find
// the base data, used when scraping the games
const $ = cheerio.load(response.data);
// Extract the elements
const engineElements = $("body").find(f95selector.BD_ENGINE_ID_SELECTOR);
const statusesElements = $("body").find(f95selector.BD_STATUS_ID_SELECTOR);
// Extract the raw text
engineElements.each(function extractEngineNames(idx, el) {
const engine = cheerio.load(el).text().trim();
shared.engines.push(engine);
});
statusesElements.each(function extractEngineNames(idx, el) {
const status = cheerio.load(el).text().trim();
shared.statuses.push(status);
});
};
//#region Utility methods
module.exports.fetchGETResponse = async function(url) {
// Secure the URL
const secureURL = exports.enforceHttpsUrl(url);
try { try {
const config = { // Fetch and return the response
headers: { return await axios.get(secureURL, commonConfig);
"User-Agent": userAgent,
"Connection": "keep-alive"
},
withCredentials: true,
jar: cookieJar // Used to store the token in the PC
};
// Fetch the response of the platform
const response = await axios.get(f95url.F95_LOGIN_URL, config);
// The response is a HTML page, we need to find the <input> with name "_xfToken"
const $ = cheerio.load(response.data);
const token = $("body").find("input[name='_xfToken']").attr("value");
return token;
} catch (e) { } catch (e) {
shared.logger.error(`Error ${e.message} occurred while trying to fetch F95 token`); shared.logger.error(`Error ${e.message} occurred while trying to fetch ${secureURL}`);
return null; return null;
} }
}; };
@ -121,8 +173,7 @@ module.exports.getF95Token = async function() {
* @returns {String} * @returns {String}
*/ */
module.exports.enforceHttpsUrl = function (url) { module.exports.enforceHttpsUrl = function (url) {
const value = _.isString(url) ? url.replace(/^(https?:)?\/\//, "https://") : null; return _.isString(url) ? url.replace(/^(https?:)?\/\//, "https://") : null;
return value;
}; };
/** /**
@ -186,4 +237,5 @@ module.exports.urlExists = async function (url, checkRedirect) {
module.exports.getUrlRedirect = async function (url) { module.exports.getUrlRedirect = async function (url) {
const response = await ky.head(url); const response = await ky.head(url);
return response.url; return response.url;
}; };
//#endregion Utility methods

View File

@ -27,33 +27,33 @@ module.exports.getGameInfo = async function (url) {
// Extract data // Extract data
const titleData = extractInfoFromTitle(body); const titleData = extractInfoFromTitle(body);
console.log(titleData);
const tags = extractTags(body); const tags = extractTags(body);
console.log(tags); const prefixesData = parseGamePrefixes(body);
const mainPostData = extractInfoFromMainPost(mainPost); const src = extractPreviewSource(body);
console.log(mainPostData); const changelog = extractChangelog(mainPost);
const structuredData = extractStructuredData(body); const structuredData = extractStructuredData(body);
const parsedInfos = parseMainPostText(structuredData["description"]);
const overview = getOverview(structuredData["description"], prefixesData.mod);
// Obtain the updated URL // Obtain the updated URL
const redirectUrl = await getUrlRedirect(url); const redirectUrl = await getUrlRedirect(url);
// TODO: Check to change
const parsedInfos = parseMainPostText(mainPost.text());
const overview = getOverview(mainPost.text(), info.isMod);
// Fill in the GameInfo element with the information obtained // Fill in the GameInfo element with the information obtained
const info = new GameInfo(); const info = new GameInfo();
info.name = titleData.name; info.name = titleData.name;
info.author = titleData.author; info.author = titleData.author;
info.isMod = titleData.mod; info.isMod = prefixesData.mod;
info.engine = titleData.engine; info.engine = prefixesData.engine;
info.status = titleData.status; info.status = prefixesData.status;
info.tags = tags; info.tags = tags;
info.url = redirectUrl; info.url = redirectUrl;
info.language = parsedInfos.Language;
info.overview = overview; info.overview = overview;
info.lastUpdate = titleData.mod ? parsedInfos.UPDATED : parsedInfos.THREAD_UPDATED; info.supportedOS = parsedInfos.SupportedOS;
info.previewSource = mainPostData.previewSource; info.censored = parsedInfos.Censored;
info.changelog = mainPostData.changelog; info.lastUpdate = parsedInfos.LastUpdate;
info.previewSrc = src;
info.changelog = changelog;
info.version = titleData.version; info.version = titleData.version;
shared.logger.info(`Founded data for ${info.name}`); shared.logger.info(`Founded data for ${info.name}`);
@ -63,41 +63,86 @@ module.exports.getGameInfo = async function (url) {
//#region Private methods //#region Private methods
/** /**
* @private * @private
* Extracts all the possible informations from the title, including the prefixes. * Parse the game prefixes obtaining the engine used,
* the advancement status and if the game is actually a game or a mod.
* @param {cheerio.Cheerio} body Page `body` selector
* @returns {Object} Dictionary of values
*/
function parseGamePrefixes(body) {
shared.logger.trace("Parsing prefixes...");
// Local variables
let mod = false,
engine = null,
status = null;
// Obtain the title prefixes
const prefixeElements = body.find(f95Selector.GT_TITLE_PREFIXES);
prefixeElements.each(function parseGamePrefix(idx, el) {
// Obtain the prefix text
let prefix = cheerio.load(el).text().trim();
// Remove the square brackets
prefix = prefix.replace("[", "").replace("]", "");
// Check what the prefix indicates
if (isEngine(prefix)) engine = prefix;
else if (isStatus(prefix)) status = prefix;
else if (isMod(prefix)) mod = true;
});
// If the status is not set, then the game in in development (Ongoing)
if (!status) status = "Ongoing";
return {
engine,
status,
mod
};
}
/**
* @private
* Extracts all the possible informations from the title.
* @param {cheerio.Cheerio} body Page `body` selector * @param {cheerio.Cheerio} body Page `body` selector
* @returns {Object} Dictionary of values * @returns {Object} Dictionary of values
*/ */
function extractInfoFromTitle(body) { function extractInfoFromTitle(body) {
shared.logger.trace("Extracting information from title...");
const title = body const title = body
.find(f95Selector.GT_TITLE) .find(f95Selector.GT_TITLE)
.text() .text()
.trim(); .trim();
// From the title we can extract: Name, author and version // From the title we can extract: Name, author and version
// TITLE [VERSION] [AUTHOR] // [PREFIXES] TITLE [VERSION] [AUTHOR]
const matches = title.match(/\[(.*?)\]/g); const matches = title.match(/\[(.*?)\]/g);
const endIndex = title.indexOf("["); // The open bracket of the version
const name = title.substring(0, endIndex).trim();
const version = matches[0].trim();
const author = matches[1].trim();
// Parse the title prefixes // Get the title name
const prefixeElements = body.find(f95Selector.GT_TITLE_PREFIXES); let name = title;
let mod = false, engine = null, status = null; matches.forEach(function replaceElementsInTitle(e) {
prefixeElements.each(function parseGamePrefixes(el) { name = name.replace(e, "");
const prefix = el.text().trim();
if(isEngine(prefix)) engine = prefix;
else if(isStatus(prefix)) status = prefix;
else if (isMod(prefix)) mod = true;
}); });
name = name.trim();
// The regex [[\]]+ remove the square brackets
// The version is the penultimate element.
// If the matches are less than 2, than the title
// is malformes and only the author is fetched
// (usually the author is always present)
let version = null;
if (matches.length >= 2) version = matches[matches.length - 2].replace(/[[\]]+/g, "").trim();
else shared.logger.trace(`Malformed title: ${title}`);
// Last element
const author = matches[matches.length - 1].replace(/[[\]]+/g, "").trim();
return { return {
name, name,
version, version,
author, author,
engine,
status,
mod
}; };
} }
@ -108,32 +153,49 @@ function extractInfoFromTitle(body) {
* @returns {String[]} List of tags * @returns {String[]} List of tags
*/ */
function extractTags(body) { function extractTags(body) {
shared.logger.trace("Extracting tags...");
// Get the game tags // Get the game tags
const tagResults = body.find(f95Selector.GT_TAGS); const tagResults = body.find(f95Selector.GT_TAGS);
return tagResults.map((idx, el) => { return tagResults.map(function parseGameTags(idx, el) {
return el.text().trim(); return cheerio.load(el).text().trim();
}).get(); }).get();
} }
/** /**
* @private * @private
* Extracts the name of the game, its author and its current version from the title of the page. * Gets the URL of the image used as a preview.
* @param {cheerio.Cheerio} mainPost Selector of the main post * @param {cheerio.Cheerio} body Page `body` selector
* @returns {Object} Dictionary of values * @returns {String} URL of the image
*/ */
function extractInfoFromMainPost(mainPost) { function extractPreviewSource(body) {
// Get the preview image shared.logger.trace("Extracting image preview source...");
const previewElement = mainPost.find(f95Selector.GT_IMAGES); const image = body.find(f95Selector.GT_IMAGES);
const previewSource = previewElement ? previewElement.first().attr("src") : null;
// Get the latest changelog
const changelogElement = mainPost.find(f95Selector.GT_LAST_CHANGELOG);
const changelog = changelogElement ? changelogElement.text().trim() : null;
return { // The "src" attribute is rendered only in a second moment,
previewSource, // we need the "static" src value saved in the attribute "data-src"
changelog const source = image ? image.attr("data-src") : null;
}; return source;
}
/**
* @private
* Gets the changelog of the latest version.
* @param {cheerio.Cheerio} mainPost main post selector
* @returns {String} Changelog of the last version or `null` if no changelog is fetched
*/
function extractChangelog(mainPost) {
shared.logger.trace("Extracting last changelog...");
// Obtain changelog
let changelog = mainPost.find(f95Selector.GT_LAST_CHANGELOG).text().trim();
// Clean changelog
changelog = changelog.replace("Spoiler", "");
changelog = changelog.replace(/\n+/g, "\n");
// Return changelog
return changelog ? changelog : null;
} }
/** /**
@ -144,7 +206,9 @@ function extractInfoFromMainPost(mainPost) {
* @returns {Object} Dictionary of information * @returns {Object} Dictionary of information
*/ */
function parseMainPostText(text) { function parseMainPostText(text) {
const dataPairs = {}; shared.logger.trace("Parsing main post raw text...");
const data = {};
// The information searched in the game post are one per line // The information searched in the game post are one per line
const splittedText = text.split("\n"); const splittedText = text.split("\n");
@ -157,28 +221,80 @@ function parseMainPostText(text) {
const value = splitted[1].trim(); const value = splitted[1].trim();
// Add pair to the dict if valid // Add pair to the dict if valid
if (value !== "") dataPairs[key] = value; if (value !== "") data[key] = value;
} }
return dataPairs; // Parse the standard pairs
const parsedDict = {};
// Check if the game is censored
if (data.CENSORED) {
const censored = data.CENSORED.toUpperCase() === "NO" ? false : true;
parsedDict["Censored"] = censored;
delete data.CENSORED;
}
// Last update of the main post
if (data.UPDATED) {
parsedDict["LastUpdate"] = new Date(data.UPDATED);
delete data.UPDATED;
}
else if (data.THREAD_UPDATED) {
parsedDict["LastUpdate"] = new Date(data.THREAD_UPDATED);
delete data.THREAD_UPDATED;
}
// Parse the supported OS
if (data.OS) {
const listOS = [];
// Usually the string is something like "Windows, Linux, Mac"
const splitted = data.OS.split(",");
splitted.forEach(function (os) {
listOS.push(os.trim());
});
parsedDict["SupportedOS"] = listOS;
delete data.OS;
}
// Rename the key for the language
if (data.LANGUAGE) {
parsedDict["Language"] = data.LANGUAGE;
delete data.LANGUAGE;
}
// What remains is added to a sub dictionary
parsedDict["Various"] = data;
return parsedDict;
} }
/** /**
* @private * @private
* Extracts and processes the JSON-LD values found at the bottom of the page. * Extracts and processes the JSON-LD values found at the bottom of the page.
* @param {cheerio.Cheerio} body Page `body` selector * @param {cheerio.Cheerio} body Page `body` selector
* @returns ??? * @returns {Object} JSON-LD or `null` if no valid JSON is found
*/ */
function extractStructuredData(body) { function extractStructuredData(body) {
const structuredDataElements = body.find("..."); shared.logger.trace("Extracting JSON-LD data...");
for (const el in structuredDataElements) { const structuredDataElements = body.find(f95Selector.GT_JSONLD);
for (const child in structuredDataElements[el].children) { const json = structuredDataElements.map(function parseScriptTag(idx, el) {
const data = structuredDataElements[el].children[child].data; // Get the element HTML
console.log(data); const html = cheerio.load(el).html().trim();
// TODO: The @type should be "Book"
// TODO: Test here // Obtain the JSON-LD
} const data = html
} .replace("<script type=\"application/ld+json\">", "")
.replace("</script>", "");
// Convert the string to an object
const json = JSON.parse(data);
// Return only the data of the game
if (json["@type"] === "Book") return json;
}).get();
return json[0] ? json[0] : null;
} }
/** /**
@ -190,6 +306,7 @@ function extractStructuredData(body) {
* @returns {Promise<String>} Game description * @returns {Promise<String>} Game description
*/ */
function getOverview(text, mod) { function getOverview(text, mod) {
shared.logger.trace("Extracting game overview...");
// Get overview (different parsing for game and mod) // Get overview (different parsing for game and mod)
const overviewEndIndex = mod ? text.indexOf("Updated") : text.indexOf("Thread Updated"); const overviewEndIndex = mod ? text.indexOf("Updated") : text.indexOf("Thread Updated");
return text.substring(0, overviewEndIndex).replace("Overview:\n", "").trim(); return text.substring(0, overviewEndIndex).replace("Overview:\n", "").trim();
@ -235,6 +352,9 @@ function isMod(prefix) {
* @returns {String[]} * @returns {String[]}
*/ */
function toUpperCaseArray(a) { function toUpperCaseArray(a) {
// If the array is empty, return
if(a.length === 0) return [];
/** /**
* Makes a string uppercase. * Makes a string uppercase.
* @param {String} s * @param {String} s

View File

@ -57,7 +57,7 @@ module.exports.searchMod = async function (name) {
* @return {Promise<String[]>} List of URLs * @return {Promise<String[]>} List of URLs
*/ */
async function fetchResultURLs(url) { async function fetchResultURLs(url) {
shared.logger.info(`Fetching ${url}...`); shared.logger.trace(`Fetching ${url}...`);
// Fetch HTML and prepare Cheerio // Fetch HTML and prepare Cheerio
const html = await fetchHTML(url); const html = await fetchHTML(url);
@ -82,6 +82,8 @@ async function fetchResultURLs(url) {
* @returns {String} URL to thread * @returns {String} URL to thread
*/ */
function extractLinkFromResult(selector) { function extractLinkFromResult(selector) {
shared.logger.trace("Extracting thread link from result...");
const partialLink = selector const partialLink = selector
.find(f95Selector.GS_RESULT_THREAD_TITLE) .find(f95Selector.GS_RESULT_THREAD_TITLE)
.attr("href") .attr("href")

View File

@ -1,8 +1,6 @@
"use strict"; "use strict";
// Core modules // Public modules from npm
const { join } = require("path");
const log4js = require("log4js"); const log4js = require("log4js");
/** /**
@ -10,164 +8,88 @@ const log4js = require("log4js");
*/ */
class Shared { class Shared {
//#region Properties //#region Properties
/** /**
* Shows log messages and other useful functions for module debugging. * Shows log messages and other useful functions for module debugging.
* @type Boolean * @type Boolean
*/ */
static #_debug = false; static #_debug = false;
/** /**
* Indicates whether a user is logged in to the F95Zone platform or not. * Indicates whether a user is logged in to the F95Zone platform or not.
* @type Boolean * @type Boolean
*/ */
static #_isLogged = false; static #_isLogged = false;
/** /**
* List of cookies obtained from the F95Zone platform. * List of possible game engines used for development.
* @type Object[] * @type String[]
*/ */
static #_cookies = null; static #_engines = ["ADRIFT", "Flash", "HTML", "Java", "Others", "QSP", "RAGS", "RPGM", "Ren'Py", "Tads", "Unity", "Unreal Engine", "WebGL", "Wolf RPG"];
/** /**
* List of possible game engines used for development. * List of possible development statuses that a game can assume.
* @type String[] * @type String[]
*/ */
static #_engines = null; static #_statuses = ["Completed", "Onhold", "Abandoned"];
/** /**
* List of possible development statuses that a game can assume. * Logger object used to write to both file and console.
* @type String[] * @type log4js.Logger
*/ */
static #_statuses = null; static #_logger = log4js.getLogger();
/** //#endregion Properties
* Wait instruction for the browser created by puppeteer.
* @type String
*/
static WAIT_STATEMENT = "domcontentloaded";
/**
* Path to the directory to save the cache generated by the API.
* @type String
*/
static #_cacheDir = "./f95cache";
/**
* If true, it opens a new browser for each request to
* the F95Zone platform, otherwise it reuses the same.
* @type Boolean
*/
static #_isolation = false;
/**
* Logger object used to write to both file and console.
* @type log4js.Logger
*/
static #_logger = log4js.getLogger();
//#endregion Properties
//#region Getters //#region Getters
/** /**
* Shows log messages and other useful functions for module debugging. * Shows log messages and other useful functions for module debugging.
* @returns {Boolean} * @returns {Boolean}
*/ */
static get debug() { static get debug() {
return this.#_debug; return this.#_debug;
} }
/** /**
* Indicates whether a user is logged in to the F95Zone platform or not. * Indicates whether a user is logged in to the F95Zone platform or not.
* @returns {Boolean} * @returns {Boolean}
*/ */
static get isLogged() { static get isLogged() {
return this.#_isLogged; return this.#_isLogged;
} }
/** /**
* List of cookies obtained from the F95Zone platform.
* @returns {Object[]}
*/
static get cookies() {
return this.#_cookies;
}
/**
* List of possible game engines used for development. * List of possible game engines used for development.
* @returns {String[]} * @returns {String[]}
*/ */
static get engines() { static get engines() {
return this.#_engines; return this.#_engines;
} }
/** /**
* List of possible development states that a game can assume. * List of possible development states that a game can assume.
* @returns {String[]} * @returns {String[]}
*/ */
static get statuses() { static get statuses() {
return this.#_statuses; return this.#_statuses;
} }
/** /**
* Directory to save the API cache.
* @returns {String}
*/
static get cacheDir() {
return this.#_cacheDir;
}
/**
* Path to the F95 platform cache.
* @returns {String}
*/
static get cookiesCachePath() {
return join(this.#_cacheDir, "cookies.json");
}
/**
* Path to the game engine cache.
* @returns {String}
*/
static get enginesCachePath() {
return join(this.#_cacheDir, "engines.json");
}
/**
* Path to the cache of possible game states.
* @returns {String}
*/
static get statusesCachePath() {
return join(this.#_cacheDir, "statuses.json");
}
/**
* If true, it opens a new browser for each request
* to the F95Zone platform, otherwise it reuses the same.
* @returns {Boolean}
*/
static get isolation() {
return this.#_isolation;
}
/**
* Logger object used to write to both file and console. * Logger object used to write to both file and console.
* @returns {log4js.Logger} * @returns {log4js.Logger}
*/ */
static get logger() { static get logger() {
return this.#_logger; return this.#_logger;
} }
//#endregion Getters //#endregion Getters
//#region Setters //#region Setters
static set cookies(val) { static set engines(val) {
this.#_cookies = val; this.#_engines = val;
} }
static set engines(val) { static set statuses(val) {
this.#_engines = val; this.#_statuses = val;
} }
static set statuses(val) { static set debug(val) {
this.#_statuses = val; this.#_debug = val;
} }
static set cacheDir(val) { static set isLogged(val) {
this.#_cacheDir = val; this.#_isLogged = val;
} }
//#endregion Setters
static set debug(val) {
this.#_debug = val;
}
static set isLogged(val) {
this.#_isLogged = val;
}
static set isolation(val) {
this.#_isolation = val;
}
//#endregion Setters
} }
module.exports = Shared; module.exports = Shared;

View File

@ -12,27 +12,45 @@ const networkHelper = require("../app/scripts/network-helper.js");
// Configure the .env reader // Configure the .env reader
dotenv.config(); dotenv.config();
// Search for Kingdom Of Deception data // Login
searchKOD(); auth().then(async function searchGames(result) {
if(!result) return;
async function searchKOD() { // Search for Kingdom Of Deception data
await search("kingdom of deception");
// Search for Perverted Education data
await search("perverted education");
// Search for Corrupted Kingdoms data
await search("corrupted kingdoms");
// Search for Summertime Saga data
await search("summertime saga");
});
async function auth() {
console.log("Token fetch..."); console.log("Token fetch...");
const creds = new Credentials(process.env.F95_USERNAME, process.env.F95_PASSWORD); const creds = new Credentials(process.env.F95_USERNAME, process.env.F95_PASSWORD);
await creds.fetchToken(); await creds.fetchToken();
console.log(`Token obtained: ${creds.token}`); console.log(`Token obtained: ${creds.token}`);
console.log("Authenticating..."); console.log("Authenticating...");
const authenticated = await networkHelper.autenticate(creds); const result = await networkHelper.autenticate(creds);
console.log(`Authentication result: ${authenticated}`); console.log(`Authentication result: ${result.message}`);
return result.success;
}
console.log("Searching KOD..."); async function search(gamename) {
const urls = await searcher.searchGame("kingdom of deception", creds); console.log(`Searching '${gamename}'...`);
const urls = await searcher.searchGame(gamename);
console.log(`Found: ${urls}`); console.log(`Found: ${urls}`);
console.log("Scraping data..."); console.log("Scraping data...");
for (const url of urls) { for (const url of urls) {
const gamedata = await scraper.getGameInfo(url); const gamedata = await scraper.getGameInfo(url);
console.log(gamedata); console.log(`Found ${gamedata.name} (${gamedata.version}) by ${gamedata.author}`);
} }
console.log("Scraping completed!"); console.log("Scraping completed!");
} }