Better logging, completed game scraping, remove unused values
parent
ecfd1784b1
commit
1391f28ad1
|
@ -23,6 +23,22 @@ class GameInfo {
|
||||||
* @type String
|
* @type String
|
||||||
*/
|
*/
|
||||||
this.overview = null;
|
this.overview = null;
|
||||||
|
/**
|
||||||
|
* Game language.
|
||||||
|
* @type String
|
||||||
|
*/
|
||||||
|
this.language = null;
|
||||||
|
/**
|
||||||
|
* List of supported OS.
|
||||||
|
* @type
|
||||||
|
*/
|
||||||
|
this.supportedOS = [];
|
||||||
|
/**
|
||||||
|
* Specify whether the game has censorship
|
||||||
|
* measures regarding NSFW scenes.
|
||||||
|
* @type Boolean
|
||||||
|
*/
|
||||||
|
this.censored = null;
|
||||||
/**
|
/**
|
||||||
* List of tags associated with the game
|
* List of tags associated with the game
|
||||||
* @type String[]
|
* @type String[]
|
||||||
|
@ -86,6 +102,9 @@ class GameInfo {
|
||||||
author: this.author,
|
author: this.author,
|
||||||
url: this.url,
|
url: this.url,
|
||||||
overview: this.overview,
|
overview: this.overview,
|
||||||
|
language: this.language,
|
||||||
|
supportedOS: this.supportedOS,
|
||||||
|
censored: this.censored,
|
||||||
engine: this.engine,
|
engine: this.engine,
|
||||||
status: this.status,
|
status: this.status,
|
||||||
previewSrc: this.previewSrc,
|
previewSrc: this.previewSrc,
|
||||||
|
|
|
@ -1,31 +1,28 @@
|
||||||
module.exports = Object.freeze({
|
module.exports = Object.freeze({
|
||||||
AVATAR_INFO: "span.avatar",
|
BD_ENGINE_ID_SELECTOR: "div[id^=\"btn-prefix_1_\"]>span",
|
||||||
AVATAR_PIC: "a[href=\"/account/\"] > span.avatar > img[class^=\"avatar\"]",
|
BD_STATUS_ID_SELECTOR: "div[id^=\"btn-prefix_4_\"]>span",
|
||||||
ENGINE_ID_SELECTOR: "div[id^=\"btn-prefix_1_\"]>span",
|
GT_IMAGES: "img:not([title])[data-src^=\"https://attachments.f95zone.to\"][data-url=\"\"]",
|
||||||
FILTER_THREADS_BUTTON: "button[class=\"button--primary button\"]",
|
|
||||||
GT_IMAGES: "img[src^=\"https://attachments.f95zone.to\"]",
|
|
||||||
GT_TAGS: "a.tagItem",
|
GT_TAGS: "a.tagItem",
|
||||||
GT_TITLE: "h1.p-title-value",
|
GT_TITLE: "h1.p-title-value",
|
||||||
GT_TITLE_PREFIXES: "h1.p-title-value > a.labelLink > span[dir=\"auto\"]",
|
GT_TITLE_PREFIXES: "h1.p-title-value > a.labelLink > span[dir=\"auto\"]",
|
||||||
LOGIN_BUTTON: "button.button--icon--login",
|
GT_LAST_CHANGELOG: "div.bbCodeBlock-content > div:first-of-type",
|
||||||
LOGIN_MESSAGE_ERROR: "div.blockMessage.blockMessage--error.blockMessage--iconic",
|
GT_JSONLD: "script[type=\"application/ld+json\"]",
|
||||||
ONLY_GAMES_THREAD_OPTION: "select[name=\"nodes[]\"] > option[value=\"2\"]",
|
|
||||||
PASSWORD_INPUT: "input[name=\"password\"]",
|
|
||||||
SEARCH_BUTTON: "form.block > * button.button--icon--search",
|
|
||||||
SEARCH_FORM_TEXTBOX: "input[name=\"keywords\"][type=\"search\"]",
|
|
||||||
SEARCH_ONLY_GAMES_OPTION: "select[name=\"c[nodes][]\"] > option[value=\"1\"]",
|
|
||||||
STATUS_ID_SELECTOR: "div[id^=\"btn-prefix_4_\"]>span",
|
|
||||||
GS_POSTS: "article.message-body:first-child > div.bbWrapper:first-of-type",
|
|
||||||
GS_RESULT_THREAD_TITLE: "h3.contentRow-title > a",
|
|
||||||
TITLE_ONLY_CHECKBOX: "form.block > * input[name=\"c[title_only]\"]",
|
|
||||||
WT_UNREAD_THREAD_CHECKBOX: "input[type=\"checkbox\"][name=\"unread\"]",
|
|
||||||
USERNAME_ELEMENT: "a[href=\"/account/\"] > span.p-navgroup-linkText",
|
|
||||||
USERNAME_INPUT: "input[name=\"login\"]",
|
|
||||||
WT_FILTER_POPUP_BUTTON: "a.filterBar-menuTrigger",
|
WT_FILTER_POPUP_BUTTON: "a.filterBar-menuTrigger",
|
||||||
WT_NEXT_PAGE: "a.pageNav-jump--next",
|
WT_NEXT_PAGE: "a.pageNav-jump--next",
|
||||||
WT_URLS: "a[href^=\"/threads/\"][data-tp-primary]",
|
WT_URLS: "a[href^=\"/threads/\"][data-tp-primary]",
|
||||||
DOWNLOAD_LINKS_CONTAINER: "span[style=\"font-size: 18px\"]",
|
WT_UNREAD_THREAD_CHECKBOX: "input[type=\"checkbox\"][name=\"unread\"]",
|
||||||
|
GS_POSTS: "article.message-body:first-child > div.bbWrapper:first-of-type",
|
||||||
|
GS_RESULT_THREAD_TITLE: "h3.contentRow-title > a",
|
||||||
GS_RESULT_BODY: "div.contentRow-main",
|
GS_RESULT_BODY: "div.contentRow-main",
|
||||||
GS_MEMBERSHIP: "li > a:not(.username)",
|
GS_MEMBERSHIP: "li > a:not(.username)",
|
||||||
GT_LAST_CHANGELOG: "div.bbCodeBlock-content > div:first-of-type",
|
GET_REQUEST_TOKEN: "input[name=\"_xfToken\"]",
|
||||||
|
|
||||||
|
LOGIN_BUTTON: "button.button--icon--login",
|
||||||
|
LOGIN_MESSAGE_ERROR: "div.blockMessage.blockMessage--error.blockMessage--iconic",
|
||||||
|
PASSWORD_INPUT: "input[name=\"password\"]",
|
||||||
|
USERNAME_ELEMENT: "a[href=\"/account/\"] > span.p-navgroup-linkText",
|
||||||
|
USERNAME_INPUT: "input[name=\"login\"]",
|
||||||
|
AVATAR_INFO: "span.avatar",
|
||||||
|
AVATAR_PIC: "a[href=\"/account/\"] > span.avatar > img[class^=\"avatar\"]",
|
||||||
|
FILTER_THREADS_BUTTON: "button[class=\"button--primary button\"]",
|
||||||
});
|
});
|
||||||
|
|
|
@ -13,14 +13,24 @@ const tough = require("tough-cookie");
|
||||||
// Modules from file
|
// Modules from file
|
||||||
const shared = require("./shared.js");
|
const shared = require("./shared.js");
|
||||||
const f95url = require("./constants/url.js");
|
const f95url = require("./constants/url.js");
|
||||||
|
const f95selector = require("./constants/css-selector.js");
|
||||||
|
const LoginResult = require("./classes/login-result.js");
|
||||||
|
|
||||||
// Global variables
|
// Global variables
|
||||||
const userAgent =
|
const userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) " +
|
||||||
"Mozilla/5.0 (X11; Linux x86_64)" +
|
"AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0 Safari/605.1.15";
|
||||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.39 Safari/537.36";
|
|
||||||
axiosCookieJarSupport(axios);
|
axiosCookieJarSupport(axios);
|
||||||
const cookieJar = new tough.CookieJar();
|
const cookieJar = new tough.CookieJar();
|
||||||
|
|
||||||
|
const commonConfig = {
|
||||||
|
headers: {
|
||||||
|
"User-Agent": userAgent,
|
||||||
|
"Connection": "keep-alive"
|
||||||
|
},
|
||||||
|
withCredentials: true,
|
||||||
|
jar: cookieJar // Used to store the token in the PC
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @protected
|
* @protected
|
||||||
* Gets the HTML code of a page.
|
* Gets the HTML code of a page.
|
||||||
|
@ -28,19 +38,13 @@ const cookieJar = new tough.CookieJar();
|
||||||
* @returns {Promise<String>} HTML code or `null` if an error arise
|
* @returns {Promise<String>} HTML code or `null` if an error arise
|
||||||
*/
|
*/
|
||||||
module.exports.fetchHTML = async function (url) {
|
module.exports.fetchHTML = async function (url) {
|
||||||
try {
|
// Fetch the response of the platform
|
||||||
const response = await axios.get(url, {
|
const response = await exports.fetchGETResponse(url);
|
||||||
headers: {
|
if (!response) {
|
||||||
"User-Agent": userAgent
|
shared.logger.warn(`Unable to fetch HTML for ${url}`);
|
||||||
},
|
|
||||||
withCredentials: true,
|
|
||||||
jar: cookieJar
|
|
||||||
});
|
|
||||||
return response.data;
|
|
||||||
} catch (e) {
|
|
||||||
shared.logger.error(`Error ${e.message} occurred while trying to fetch ${url}`);
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
return response.data;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -49,12 +53,21 @@ module.exports.fetchHTML = async function (url) {
|
||||||
* and token obtained previously. Save cookies on your
|
* and token obtained previously. Save cookies on your
|
||||||
* device after authentication.
|
* device after authentication.
|
||||||
* @param {Credentials} credentials Platform access credentials
|
* @param {Credentials} credentials Platform access credentials
|
||||||
* @returns {Promise<Boolean>} Result of the operation
|
* @returns {Promise<LoginResul>} Result of the operation
|
||||||
*/
|
*/
|
||||||
module.exports.autenticate = async function (credentials) {
|
module.exports.autenticate = async function (credentials) {
|
||||||
shared.logger.info(`Authenticating with user ${credentials.username}`);
|
shared.logger.info(`Authenticating with user ${credentials.username}`);
|
||||||
if (!credentials.token) throw new Error(`Invalid token for auth: ${credentials.token}`);
|
if (!credentials.token) throw new Error(`Invalid token for auth: ${credentials.token}`);
|
||||||
|
|
||||||
|
// If the user is already logged, return
|
||||||
|
if(shared.isLogged) {
|
||||||
|
shared.logger.warn(`${credentials.username} already authenticated`);
|
||||||
|
return new LoginResult(true, "Already authenticated");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Secure the URL
|
||||||
|
const secureURL = exports.enforceHttpsUrl(f95url.F95_LOGIN_URL);
|
||||||
|
|
||||||
// Prepare the parameters to send to the platform to authenticate
|
// Prepare the parameters to send to the platform to authenticate
|
||||||
const params = new URLSearchParams();
|
const params = new URLSearchParams();
|
||||||
params.append("login", credentials.username);
|
params.append("login", credentials.username);
|
||||||
|
@ -67,22 +80,23 @@ module.exports.autenticate = async function (credentials) {
|
||||||
params.append("website_code", "");
|
params.append("website_code", "");
|
||||||
params.append("_xfToken", credentials.token);
|
params.append("_xfToken", credentials.token);
|
||||||
|
|
||||||
const config = {
|
|
||||||
headers: {
|
|
||||||
"User-Agent": userAgent,
|
|
||||||
"Content-Type": "application/x-www-form-urlencoded",
|
|
||||||
"Connection": "keep-alive"
|
|
||||||
},
|
|
||||||
withCredentials: true,
|
|
||||||
jar: cookieJar // Retrieve the stored cookies! What a pain to understand that this is a MUST!
|
|
||||||
};
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await axios.post(f95url.F95_LOGIN_URL, params, config);
|
// Try to log-in
|
||||||
return true;
|
const response = await axios.post(secureURL, params, commonConfig);
|
||||||
|
|
||||||
|
// Parse the response HTML
|
||||||
|
const $ = cheerio.load(response.data);
|
||||||
|
|
||||||
|
// Get the error message (if any) and remove the new line chars
|
||||||
|
const errorMessage = $("body").find(f95selector.LOGIN_MESSAGE_ERROR).text().replace(/\n/g, "");
|
||||||
|
|
||||||
|
// Return the result of the authentication
|
||||||
|
shared.isLogged = errorMessage === "";
|
||||||
|
if (errorMessage === "") return new LoginResult(true, "Authentication successful");
|
||||||
|
else return new LoginResult(false, errorMessage);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
shared.logger.error(`Error ${e.message} occurred while authenticating to ${f95url.F95_LOGIN_URL}`);
|
shared.logger.error(`Error ${e.message} occurred while authenticating to ${secureURL}`);
|
||||||
return false;
|
return new LoginResult(false, `Error ${e.message} while authenticating`);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -91,25 +105,63 @@ module.exports.autenticate = async function (credentials) {
|
||||||
* @returns {Promise<String>} Token or `null` if an error arise
|
* @returns {Promise<String>} Token or `null` if an error arise
|
||||||
*/
|
*/
|
||||||
module.exports.getF95Token = async function() {
|
module.exports.getF95Token = async function() {
|
||||||
try {
|
|
||||||
const config = {
|
|
||||||
headers: {
|
|
||||||
"User-Agent": userAgent,
|
|
||||||
"Connection": "keep-alive"
|
|
||||||
},
|
|
||||||
withCredentials: true,
|
|
||||||
jar: cookieJar // Used to store the token in the PC
|
|
||||||
};
|
|
||||||
|
|
||||||
// Fetch the response of the platform
|
// Fetch the response of the platform
|
||||||
const response = await axios.get(f95url.F95_LOGIN_URL, config);
|
const response = await exports.fetchGETResponse(f95url.F95_LOGIN_URL);
|
||||||
|
if (!response) {
|
||||||
|
shared.logger.warn("Unable to get the token for the session");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
// The response is a HTML page, we need to find the <input> with name "_xfToken"
|
// The response is a HTML page, we need to find the <input> with name "_xfToken"
|
||||||
const $ = cheerio.load(response.data);
|
const $ = cheerio.load(response.data);
|
||||||
const token = $("body").find("input[name='_xfToken']").attr("value");
|
const token = $("body").find(f95selector.GET_REQUEST_TOKEN).attr("value");
|
||||||
return token;
|
return token;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @protected
|
||||||
|
* Gets the basic data used for game data processing
|
||||||
|
* (such as graphics engines and progress statuses)
|
||||||
|
* @deprecated
|
||||||
|
*/
|
||||||
|
module.exports.fetchPlatformData = async function() {
|
||||||
|
// Fetch the response of the platform
|
||||||
|
const response = await exports.fetchGETResponse(f95url.F95_LATEST_UPDATES);
|
||||||
|
if (!response) {
|
||||||
|
shared.logger.warn("Unable to get the token for the session");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The response is a HTML page, we need to find
|
||||||
|
// the base data, used when scraping the games
|
||||||
|
const $ = cheerio.load(response.data);
|
||||||
|
|
||||||
|
// Extract the elements
|
||||||
|
const engineElements = $("body").find(f95selector.BD_ENGINE_ID_SELECTOR);
|
||||||
|
const statusesElements = $("body").find(f95selector.BD_STATUS_ID_SELECTOR);
|
||||||
|
|
||||||
|
// Extract the raw text
|
||||||
|
engineElements.each(function extractEngineNames(idx, el) {
|
||||||
|
const engine = cheerio.load(el).text().trim();
|
||||||
|
shared.engines.push(engine);
|
||||||
|
});
|
||||||
|
|
||||||
|
statusesElements.each(function extractEngineNames(idx, el) {
|
||||||
|
const status = cheerio.load(el).text().trim();
|
||||||
|
shared.statuses.push(status);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
//#region Utility methods
|
||||||
|
module.exports.fetchGETResponse = async function(url) {
|
||||||
|
// Secure the URL
|
||||||
|
const secureURL = exports.enforceHttpsUrl(url);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Fetch and return the response
|
||||||
|
return await axios.get(secureURL, commonConfig);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
shared.logger.error(`Error ${e.message} occurred while trying to fetch F95 token`);
|
shared.logger.error(`Error ${e.message} occurred while trying to fetch ${secureURL}`);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -121,8 +173,7 @@ module.exports.getF95Token = async function() {
|
||||||
* @returns {String}
|
* @returns {String}
|
||||||
*/
|
*/
|
||||||
module.exports.enforceHttpsUrl = function (url) {
|
module.exports.enforceHttpsUrl = function (url) {
|
||||||
const value = _.isString(url) ? url.replace(/^(https?:)?\/\//, "https://") : null;
|
return _.isString(url) ? url.replace(/^(https?:)?\/\//, "https://") : null;
|
||||||
return value;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -187,3 +238,4 @@ module.exports.getUrlRedirect = async function (url) {
|
||||||
const response = await ky.head(url);
|
const response = await ky.head(url);
|
||||||
return response.url;
|
return response.url;
|
||||||
};
|
};
|
||||||
|
//#endregion Utility methods
|
|
@ -27,33 +27,33 @@ module.exports.getGameInfo = async function (url) {
|
||||||
|
|
||||||
// Extract data
|
// Extract data
|
||||||
const titleData = extractInfoFromTitle(body);
|
const titleData = extractInfoFromTitle(body);
|
||||||
console.log(titleData);
|
|
||||||
const tags = extractTags(body);
|
const tags = extractTags(body);
|
||||||
console.log(tags);
|
const prefixesData = parseGamePrefixes(body);
|
||||||
const mainPostData = extractInfoFromMainPost(mainPost);
|
const src = extractPreviewSource(body);
|
||||||
console.log(mainPostData);
|
const changelog = extractChangelog(mainPost);
|
||||||
const structuredData = extractStructuredData(body);
|
const structuredData = extractStructuredData(body);
|
||||||
|
const parsedInfos = parseMainPostText(structuredData["description"]);
|
||||||
|
const overview = getOverview(structuredData["description"], prefixesData.mod);
|
||||||
|
|
||||||
// Obtain the updated URL
|
// Obtain the updated URL
|
||||||
const redirectUrl = await getUrlRedirect(url);
|
const redirectUrl = await getUrlRedirect(url);
|
||||||
|
|
||||||
// TODO: Check to change
|
|
||||||
const parsedInfos = parseMainPostText(mainPost.text());
|
|
||||||
const overview = getOverview(mainPost.text(), info.isMod);
|
|
||||||
|
|
||||||
// Fill in the GameInfo element with the information obtained
|
// Fill in the GameInfo element with the information obtained
|
||||||
const info = new GameInfo();
|
const info = new GameInfo();
|
||||||
info.name = titleData.name;
|
info.name = titleData.name;
|
||||||
info.author = titleData.author;
|
info.author = titleData.author;
|
||||||
info.isMod = titleData.mod;
|
info.isMod = prefixesData.mod;
|
||||||
info.engine = titleData.engine;
|
info.engine = prefixesData.engine;
|
||||||
info.status = titleData.status;
|
info.status = prefixesData.status;
|
||||||
info.tags = tags;
|
info.tags = tags;
|
||||||
info.url = redirectUrl;
|
info.url = redirectUrl;
|
||||||
|
info.language = parsedInfos.Language;
|
||||||
info.overview = overview;
|
info.overview = overview;
|
||||||
info.lastUpdate = titleData.mod ? parsedInfos.UPDATED : parsedInfos.THREAD_UPDATED;
|
info.supportedOS = parsedInfos.SupportedOS;
|
||||||
info.previewSource = mainPostData.previewSource;
|
info.censored = parsedInfos.Censored;
|
||||||
info.changelog = mainPostData.changelog;
|
info.lastUpdate = parsedInfos.LastUpdate;
|
||||||
|
info.previewSrc = src;
|
||||||
|
info.changelog = changelog;
|
||||||
info.version = titleData.version;
|
info.version = titleData.version;
|
||||||
|
|
||||||
shared.logger.info(`Founded data for ${info.name}`);
|
shared.logger.info(`Founded data for ${info.name}`);
|
||||||
|
@ -63,41 +63,86 @@ module.exports.getGameInfo = async function (url) {
|
||||||
//#region Private methods
|
//#region Private methods
|
||||||
/**
|
/**
|
||||||
* @private
|
* @private
|
||||||
* Extracts all the possible informations from the title, including the prefixes.
|
* Parse the game prefixes obtaining the engine used,
|
||||||
|
* the advancement status and if the game is actually a game or a mod.
|
||||||
|
* @param {cheerio.Cheerio} body Page `body` selector
|
||||||
|
* @returns {Object} Dictionary of values
|
||||||
|
*/
|
||||||
|
function parseGamePrefixes(body) {
|
||||||
|
shared.logger.trace("Parsing prefixes...");
|
||||||
|
|
||||||
|
// Local variables
|
||||||
|
let mod = false,
|
||||||
|
engine = null,
|
||||||
|
status = null;
|
||||||
|
|
||||||
|
// Obtain the title prefixes
|
||||||
|
const prefixeElements = body.find(f95Selector.GT_TITLE_PREFIXES);
|
||||||
|
|
||||||
|
prefixeElements.each(function parseGamePrefix(idx, el) {
|
||||||
|
// Obtain the prefix text
|
||||||
|
let prefix = cheerio.load(el).text().trim();
|
||||||
|
|
||||||
|
// Remove the square brackets
|
||||||
|
prefix = prefix.replace("[", "").replace("]", "");
|
||||||
|
|
||||||
|
// Check what the prefix indicates
|
||||||
|
if (isEngine(prefix)) engine = prefix;
|
||||||
|
else if (isStatus(prefix)) status = prefix;
|
||||||
|
else if (isMod(prefix)) mod = true;
|
||||||
|
});
|
||||||
|
|
||||||
|
// If the status is not set, then the game in in development (Ongoing)
|
||||||
|
if (!status) status = "Ongoing";
|
||||||
|
|
||||||
|
return {
|
||||||
|
engine,
|
||||||
|
status,
|
||||||
|
mod
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @private
|
||||||
|
* Extracts all the possible informations from the title.
|
||||||
* @param {cheerio.Cheerio} body Page `body` selector
|
* @param {cheerio.Cheerio} body Page `body` selector
|
||||||
* @returns {Object} Dictionary of values
|
* @returns {Object} Dictionary of values
|
||||||
*/
|
*/
|
||||||
function extractInfoFromTitle(body) {
|
function extractInfoFromTitle(body) {
|
||||||
|
shared.logger.trace("Extracting information from title...");
|
||||||
const title = body
|
const title = body
|
||||||
.find(f95Selector.GT_TITLE)
|
.find(f95Selector.GT_TITLE)
|
||||||
.text()
|
.text()
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
// From the title we can extract: Name, author and version
|
// From the title we can extract: Name, author and version
|
||||||
// TITLE [VERSION] [AUTHOR]
|
// [PREFIXES] TITLE [VERSION] [AUTHOR]
|
||||||
const matches = title.match(/\[(.*?)\]/g);
|
const matches = title.match(/\[(.*?)\]/g);
|
||||||
const endIndex = title.indexOf("["); // The open bracket of the version
|
|
||||||
const name = title.substring(0, endIndex).trim();
|
|
||||||
const version = matches[0].trim();
|
|
||||||
const author = matches[1].trim();
|
|
||||||
|
|
||||||
// Parse the title prefixes
|
// Get the title name
|
||||||
const prefixeElements = body.find(f95Selector.GT_TITLE_PREFIXES);
|
let name = title;
|
||||||
let mod = false, engine = null, status = null;
|
matches.forEach(function replaceElementsInTitle(e) {
|
||||||
prefixeElements.each(function parseGamePrefixes(el) {
|
name = name.replace(e, "");
|
||||||
const prefix = el.text().trim();
|
|
||||||
if(isEngine(prefix)) engine = prefix;
|
|
||||||
else if(isStatus(prefix)) status = prefix;
|
|
||||||
else if (isMod(prefix)) mod = true;
|
|
||||||
});
|
});
|
||||||
|
name = name.trim();
|
||||||
|
|
||||||
|
// The regex [[\]]+ remove the square brackets
|
||||||
|
|
||||||
|
// The version is the penultimate element.
|
||||||
|
// If the matches are less than 2, than the title
|
||||||
|
// is malformes and only the author is fetched
|
||||||
|
// (usually the author is always present)
|
||||||
|
let version = null;
|
||||||
|
if (matches.length >= 2) version = matches[matches.length - 2].replace(/[[\]]+/g, "").trim();
|
||||||
|
else shared.logger.trace(`Malformed title: ${title}`);
|
||||||
|
|
||||||
|
// Last element
|
||||||
|
const author = matches[matches.length - 1].replace(/[[\]]+/g, "").trim();
|
||||||
|
|
||||||
return {
|
return {
|
||||||
name,
|
name,
|
||||||
version,
|
version,
|
||||||
author,
|
author,
|
||||||
engine,
|
|
||||||
status,
|
|
||||||
mod
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -108,32 +153,49 @@ function extractInfoFromTitle(body) {
|
||||||
* @returns {String[]} List of tags
|
* @returns {String[]} List of tags
|
||||||
*/
|
*/
|
||||||
function extractTags(body) {
|
function extractTags(body) {
|
||||||
|
shared.logger.trace("Extracting tags...");
|
||||||
|
|
||||||
// Get the game tags
|
// Get the game tags
|
||||||
const tagResults = body.find(f95Selector.GT_TAGS);
|
const tagResults = body.find(f95Selector.GT_TAGS);
|
||||||
return tagResults.map((idx, el) => {
|
return tagResults.map(function parseGameTags(idx, el) {
|
||||||
return el.text().trim();
|
return cheerio.load(el).text().trim();
|
||||||
}).get();
|
}).get();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @private
|
* @private
|
||||||
* Extracts the name of the game, its author and its current version from the title of the page.
|
* Gets the URL of the image used as a preview.
|
||||||
* @param {cheerio.Cheerio} mainPost Selector of the main post
|
* @param {cheerio.Cheerio} body Page `body` selector
|
||||||
* @returns {Object} Dictionary of values
|
* @returns {String} URL of the image
|
||||||
*/
|
*/
|
||||||
function extractInfoFromMainPost(mainPost) {
|
function extractPreviewSource(body) {
|
||||||
// Get the preview image
|
shared.logger.trace("Extracting image preview source...");
|
||||||
const previewElement = mainPost.find(f95Selector.GT_IMAGES);
|
const image = body.find(f95Selector.GT_IMAGES);
|
||||||
const previewSource = previewElement ? previewElement.first().attr("src") : null;
|
|
||||||
|
|
||||||
// Get the latest changelog
|
// The "src" attribute is rendered only in a second moment,
|
||||||
const changelogElement = mainPost.find(f95Selector.GT_LAST_CHANGELOG);
|
// we need the "static" src value saved in the attribute "data-src"
|
||||||
const changelog = changelogElement ? changelogElement.text().trim() : null;
|
const source = image ? image.attr("data-src") : null;
|
||||||
|
return source;
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
/**
|
||||||
previewSource,
|
* @private
|
||||||
changelog
|
* Gets the changelog of the latest version.
|
||||||
};
|
* @param {cheerio.Cheerio} mainPost main post selector
|
||||||
|
* @returns {String} Changelog of the last version or `null` if no changelog is fetched
|
||||||
|
*/
|
||||||
|
function extractChangelog(mainPost) {
|
||||||
|
shared.logger.trace("Extracting last changelog...");
|
||||||
|
|
||||||
|
// Obtain changelog
|
||||||
|
let changelog = mainPost.find(f95Selector.GT_LAST_CHANGELOG).text().trim();
|
||||||
|
|
||||||
|
// Clean changelog
|
||||||
|
changelog = changelog.replace("Spoiler", "");
|
||||||
|
changelog = changelog.replace(/\n+/g, "\n");
|
||||||
|
|
||||||
|
// Return changelog
|
||||||
|
return changelog ? changelog : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -144,7 +206,9 @@ function extractInfoFromMainPost(mainPost) {
|
||||||
* @returns {Object} Dictionary of information
|
* @returns {Object} Dictionary of information
|
||||||
*/
|
*/
|
||||||
function parseMainPostText(text) {
|
function parseMainPostText(text) {
|
||||||
const dataPairs = {};
|
shared.logger.trace("Parsing main post raw text...");
|
||||||
|
|
||||||
|
const data = {};
|
||||||
|
|
||||||
// The information searched in the game post are one per line
|
// The information searched in the game post are one per line
|
||||||
const splittedText = text.split("\n");
|
const splittedText = text.split("\n");
|
||||||
|
@ -157,28 +221,80 @@ function parseMainPostText(text) {
|
||||||
const value = splitted[1].trim();
|
const value = splitted[1].trim();
|
||||||
|
|
||||||
// Add pair to the dict if valid
|
// Add pair to the dict if valid
|
||||||
if (value !== "") dataPairs[key] = value;
|
if (value !== "") data[key] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
return dataPairs;
|
// Parse the standard pairs
|
||||||
|
const parsedDict = {};
|
||||||
|
|
||||||
|
// Check if the game is censored
|
||||||
|
if (data.CENSORED) {
|
||||||
|
const censored = data.CENSORED.toUpperCase() === "NO" ? false : true;
|
||||||
|
parsedDict["Censored"] = censored;
|
||||||
|
delete data.CENSORED;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Last update of the main post
|
||||||
|
if (data.UPDATED) {
|
||||||
|
parsedDict["LastUpdate"] = new Date(data.UPDATED);
|
||||||
|
delete data.UPDATED;
|
||||||
|
}
|
||||||
|
else if (data.THREAD_UPDATED) {
|
||||||
|
parsedDict["LastUpdate"] = new Date(data.THREAD_UPDATED);
|
||||||
|
delete data.THREAD_UPDATED;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the supported OS
|
||||||
|
if (data.OS) {
|
||||||
|
const listOS = [];
|
||||||
|
|
||||||
|
// Usually the string is something like "Windows, Linux, Mac"
|
||||||
|
const splitted = data.OS.split(",");
|
||||||
|
splitted.forEach(function (os) {
|
||||||
|
listOS.push(os.trim());
|
||||||
|
});
|
||||||
|
|
||||||
|
parsedDict["SupportedOS"] = listOS;
|
||||||
|
delete data.OS;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rename the key for the language
|
||||||
|
if (data.LANGUAGE) {
|
||||||
|
parsedDict["Language"] = data.LANGUAGE;
|
||||||
|
delete data.LANGUAGE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// What remains is added to a sub dictionary
|
||||||
|
parsedDict["Various"] = data;
|
||||||
|
|
||||||
|
return parsedDict;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @private
|
* @private
|
||||||
* Extracts and processes the JSON-LD values found at the bottom of the page.
|
* Extracts and processes the JSON-LD values found at the bottom of the page.
|
||||||
* @param {cheerio.Cheerio} body Page `body` selector
|
* @param {cheerio.Cheerio} body Page `body` selector
|
||||||
* @returns ???
|
* @returns {Object} JSON-LD or `null` if no valid JSON is found
|
||||||
*/
|
*/
|
||||||
function extractStructuredData(body) {
|
function extractStructuredData(body) {
|
||||||
const structuredDataElements = body.find("...");
|
shared.logger.trace("Extracting JSON-LD data...");
|
||||||
for (const el in structuredDataElements) {
|
const structuredDataElements = body.find(f95Selector.GT_JSONLD);
|
||||||
for (const child in structuredDataElements[el].children) {
|
const json = structuredDataElements.map(function parseScriptTag(idx, el) {
|
||||||
const data = structuredDataElements[el].children[child].data;
|
// Get the element HTML
|
||||||
console.log(data);
|
const html = cheerio.load(el).html().trim();
|
||||||
// TODO: The @type should be "Book"
|
|
||||||
// TODO: Test here
|
// Obtain the JSON-LD
|
||||||
}
|
const data = html
|
||||||
}
|
.replace("<script type=\"application/ld+json\">", "")
|
||||||
|
.replace("</script>", "");
|
||||||
|
|
||||||
|
// Convert the string to an object
|
||||||
|
const json = JSON.parse(data);
|
||||||
|
|
||||||
|
// Return only the data of the game
|
||||||
|
if (json["@type"] === "Book") return json;
|
||||||
|
}).get();
|
||||||
|
return json[0] ? json[0] : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -190,6 +306,7 @@ function extractStructuredData(body) {
|
||||||
* @returns {Promise<String>} Game description
|
* @returns {Promise<String>} Game description
|
||||||
*/
|
*/
|
||||||
function getOverview(text, mod) {
|
function getOverview(text, mod) {
|
||||||
|
shared.logger.trace("Extracting game overview...");
|
||||||
// Get overview (different parsing for game and mod)
|
// Get overview (different parsing for game and mod)
|
||||||
const overviewEndIndex = mod ? text.indexOf("Updated") : text.indexOf("Thread Updated");
|
const overviewEndIndex = mod ? text.indexOf("Updated") : text.indexOf("Thread Updated");
|
||||||
return text.substring(0, overviewEndIndex).replace("Overview:\n", "").trim();
|
return text.substring(0, overviewEndIndex).replace("Overview:\n", "").trim();
|
||||||
|
@ -235,6 +352,9 @@ function isMod(prefix) {
|
||||||
* @returns {String[]}
|
* @returns {String[]}
|
||||||
*/
|
*/
|
||||||
function toUpperCaseArray(a) {
|
function toUpperCaseArray(a) {
|
||||||
|
// If the array is empty, return
|
||||||
|
if(a.length === 0) return [];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Makes a string uppercase.
|
* Makes a string uppercase.
|
||||||
* @param {String} s
|
* @param {String} s
|
||||||
|
|
|
@ -57,7 +57,7 @@ module.exports.searchMod = async function (name) {
|
||||||
* @return {Promise<String[]>} List of URLs
|
* @return {Promise<String[]>} List of URLs
|
||||||
*/
|
*/
|
||||||
async function fetchResultURLs(url) {
|
async function fetchResultURLs(url) {
|
||||||
shared.logger.info(`Fetching ${url}...`);
|
shared.logger.trace(`Fetching ${url}...`);
|
||||||
|
|
||||||
// Fetch HTML and prepare Cheerio
|
// Fetch HTML and prepare Cheerio
|
||||||
const html = await fetchHTML(url);
|
const html = await fetchHTML(url);
|
||||||
|
@ -82,6 +82,8 @@ async function fetchResultURLs(url) {
|
||||||
* @returns {String} URL to thread
|
* @returns {String} URL to thread
|
||||||
*/
|
*/
|
||||||
function extractLinkFromResult(selector) {
|
function extractLinkFromResult(selector) {
|
||||||
|
shared.logger.trace("Extracting thread link from result...");
|
||||||
|
|
||||||
const partialLink = selector
|
const partialLink = selector
|
||||||
.find(f95Selector.GS_RESULT_THREAD_TITLE)
|
.find(f95Selector.GS_RESULT_THREAD_TITLE)
|
||||||
.attr("href")
|
.attr("href")
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
// Core modules
|
// Public modules from npm
|
||||||
const { join } = require("path");
|
|
||||||
|
|
||||||
const log4js = require("log4js");
|
const log4js = require("log4js");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -20,37 +18,16 @@ class Shared {
|
||||||
* @type Boolean
|
* @type Boolean
|
||||||
*/
|
*/
|
||||||
static #_isLogged = false;
|
static #_isLogged = false;
|
||||||
/**
|
|
||||||
* List of cookies obtained from the F95Zone platform.
|
|
||||||
* @type Object[]
|
|
||||||
*/
|
|
||||||
static #_cookies = null;
|
|
||||||
/**
|
/**
|
||||||
* List of possible game engines used for development.
|
* List of possible game engines used for development.
|
||||||
* @type String[]
|
* @type String[]
|
||||||
*/
|
*/
|
||||||
static #_engines = null;
|
static #_engines = ["ADRIFT", "Flash", "HTML", "Java", "Others", "QSP", "RAGS", "RPGM", "Ren'Py", "Tads", "Unity", "Unreal Engine", "WebGL", "Wolf RPG"];
|
||||||
/**
|
/**
|
||||||
* List of possible development statuses that a game can assume.
|
* List of possible development statuses that a game can assume.
|
||||||
* @type String[]
|
* @type String[]
|
||||||
*/
|
*/
|
||||||
static #_statuses = null;
|
static #_statuses = ["Completed", "Onhold", "Abandoned"];
|
||||||
/**
|
|
||||||
* Wait instruction for the browser created by puppeteer.
|
|
||||||
* @type String
|
|
||||||
*/
|
|
||||||
static WAIT_STATEMENT = "domcontentloaded";
|
|
||||||
/**
|
|
||||||
* Path to the directory to save the cache generated by the API.
|
|
||||||
* @type String
|
|
||||||
*/
|
|
||||||
static #_cacheDir = "./f95cache";
|
|
||||||
/**
|
|
||||||
* If true, it opens a new browser for each request to
|
|
||||||
* the F95Zone platform, otherwise it reuses the same.
|
|
||||||
* @type Boolean
|
|
||||||
*/
|
|
||||||
static #_isolation = false;
|
|
||||||
/**
|
/**
|
||||||
* Logger object used to write to both file and console.
|
* Logger object used to write to both file and console.
|
||||||
* @type log4js.Logger
|
* @type log4js.Logger
|
||||||
|
@ -73,13 +50,6 @@ class Shared {
|
||||||
static get isLogged() {
|
static get isLogged() {
|
||||||
return this.#_isLogged;
|
return this.#_isLogged;
|
||||||
}
|
}
|
||||||
/**
|
|
||||||
* List of cookies obtained from the F95Zone platform.
|
|
||||||
* @returns {Object[]}
|
|
||||||
*/
|
|
||||||
static get cookies() {
|
|
||||||
return this.#_cookies;
|
|
||||||
}
|
|
||||||
/**
|
/**
|
||||||
* List of possible game engines used for development.
|
* List of possible game engines used for development.
|
||||||
* @returns {String[]}
|
* @returns {String[]}
|
||||||
|
@ -94,42 +64,6 @@ class Shared {
|
||||||
static get statuses() {
|
static get statuses() {
|
||||||
return this.#_statuses;
|
return this.#_statuses;
|
||||||
}
|
}
|
||||||
/**
|
|
||||||
* Directory to save the API cache.
|
|
||||||
* @returns {String}
|
|
||||||
*/
|
|
||||||
static get cacheDir() {
|
|
||||||
return this.#_cacheDir;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Path to the F95 platform cache.
|
|
||||||
* @returns {String}
|
|
||||||
*/
|
|
||||||
static get cookiesCachePath() {
|
|
||||||
return join(this.#_cacheDir, "cookies.json");
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Path to the game engine cache.
|
|
||||||
* @returns {String}
|
|
||||||
*/
|
|
||||||
static get enginesCachePath() {
|
|
||||||
return join(this.#_cacheDir, "engines.json");
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Path to the cache of possible game states.
|
|
||||||
* @returns {String}
|
|
||||||
*/
|
|
||||||
static get statusesCachePath() {
|
|
||||||
return join(this.#_cacheDir, "statuses.json");
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* If true, it opens a new browser for each request
|
|
||||||
* to the F95Zone platform, otherwise it reuses the same.
|
|
||||||
* @returns {Boolean}
|
|
||||||
*/
|
|
||||||
static get isolation() {
|
|
||||||
return this.#_isolation;
|
|
||||||
}
|
|
||||||
/**
|
/**
|
||||||
* Logger object used to write to both file and console.
|
* Logger object used to write to both file and console.
|
||||||
* @returns {log4js.Logger}
|
* @returns {log4js.Logger}
|
||||||
|
@ -140,10 +74,6 @@ class Shared {
|
||||||
//#endregion Getters
|
//#endregion Getters
|
||||||
|
|
||||||
//#region Setters
|
//#region Setters
|
||||||
static set cookies(val) {
|
|
||||||
this.#_cookies = val;
|
|
||||||
}
|
|
||||||
|
|
||||||
static set engines(val) {
|
static set engines(val) {
|
||||||
this.#_engines = val;
|
this.#_engines = val;
|
||||||
}
|
}
|
||||||
|
@ -152,10 +82,6 @@ class Shared {
|
||||||
this.#_statuses = val;
|
this.#_statuses = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
static set cacheDir(val) {
|
|
||||||
this.#_cacheDir = val;
|
|
||||||
}
|
|
||||||
|
|
||||||
static set debug(val) {
|
static set debug(val) {
|
||||||
this.#_debug = val;
|
this.#_debug = val;
|
||||||
}
|
}
|
||||||
|
@ -163,10 +89,6 @@ class Shared {
|
||||||
static set isLogged(val) {
|
static set isLogged(val) {
|
||||||
this.#_isLogged = val;
|
this.#_isLogged = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
static set isolation(val) {
|
|
||||||
this.#_isolation = val;
|
|
||||||
}
|
|
||||||
//#endregion Setters
|
//#endregion Setters
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,27 +12,45 @@ const networkHelper = require("../app/scripts/network-helper.js");
|
||||||
// Configure the .env reader
|
// Configure the .env reader
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
// Search for Kingdom Of Deception data
|
// Login
|
||||||
searchKOD();
|
auth().then(async function searchGames(result) {
|
||||||
|
if(!result) return;
|
||||||
|
|
||||||
async function searchKOD() {
|
// Search for Kingdom Of Deception data
|
||||||
|
await search("kingdom of deception");
|
||||||
|
|
||||||
|
// Search for Perverted Education data
|
||||||
|
await search("perverted education");
|
||||||
|
|
||||||
|
// Search for Corrupted Kingdoms data
|
||||||
|
await search("corrupted kingdoms");
|
||||||
|
|
||||||
|
// Search for Summertime Saga data
|
||||||
|
await search("summertime saga");
|
||||||
|
});
|
||||||
|
|
||||||
|
async function auth() {
|
||||||
console.log("Token fetch...");
|
console.log("Token fetch...");
|
||||||
const creds = new Credentials(process.env.F95_USERNAME, process.env.F95_PASSWORD);
|
const creds = new Credentials(process.env.F95_USERNAME, process.env.F95_PASSWORD);
|
||||||
await creds.fetchToken();
|
await creds.fetchToken();
|
||||||
console.log(`Token obtained: ${creds.token}`);
|
console.log(`Token obtained: ${creds.token}`);
|
||||||
|
|
||||||
console.log("Authenticating...");
|
console.log("Authenticating...");
|
||||||
const authenticated = await networkHelper.autenticate(creds);
|
const result = await networkHelper.autenticate(creds);
|
||||||
console.log(`Authentication result: ${authenticated}`);
|
console.log(`Authentication result: ${result.message}`);
|
||||||
|
|
||||||
console.log("Searching KOD...");
|
return result.success;
|
||||||
const urls = await searcher.searchGame("kingdom of deception", creds);
|
}
|
||||||
|
|
||||||
|
async function search(gamename) {
|
||||||
|
console.log(`Searching '${gamename}'...`);
|
||||||
|
const urls = await searcher.searchGame(gamename);
|
||||||
console.log(`Found: ${urls}`);
|
console.log(`Found: ${urls}`);
|
||||||
|
|
||||||
console.log("Scraping data...");
|
console.log("Scraping data...");
|
||||||
for (const url of urls) {
|
for (const url of urls) {
|
||||||
const gamedata = await scraper.getGameInfo(url);
|
const gamedata = await scraper.getGameInfo(url);
|
||||||
console.log(gamedata);
|
console.log(`Found ${gamedata.name} (${gamedata.version}) by ${gamedata.author}`);
|
||||||
}
|
}
|
||||||
console.log("Scraping completed!");
|
console.log("Scraping completed!");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue