Better logging, completed game scraping, remove unused values
parent
ecfd1784b1
commit
1391f28ad1
|
@ -23,6 +23,22 @@ class GameInfo {
|
|||
* @type String
|
||||
*/
|
||||
this.overview = null;
|
||||
/**
|
||||
* Game language.
|
||||
* @type String
|
||||
*/
|
||||
this.language = null;
|
||||
/**
|
||||
* List of supported OS.
|
||||
* @type
|
||||
*/
|
||||
this.supportedOS = [];
|
||||
/**
|
||||
* Specify whether the game has censorship
|
||||
* measures regarding NSFW scenes.
|
||||
* @type Boolean
|
||||
*/
|
||||
this.censored = null;
|
||||
/**
|
||||
* List of tags associated with the game
|
||||
* @type String[]
|
||||
|
@ -86,6 +102,9 @@ class GameInfo {
|
|||
author: this.author,
|
||||
url: this.url,
|
||||
overview: this.overview,
|
||||
language: this.language,
|
||||
supportedOS: this.supportedOS,
|
||||
censored: this.censored,
|
||||
engine: this.engine,
|
||||
status: this.status,
|
||||
previewSrc: this.previewSrc,
|
||||
|
|
|
@ -1,31 +1,28 @@
|
|||
module.exports = Object.freeze({
|
||||
AVATAR_INFO: "span.avatar",
|
||||
AVATAR_PIC: "a[href=\"/account/\"] > span.avatar > img[class^=\"avatar\"]",
|
||||
ENGINE_ID_SELECTOR: "div[id^=\"btn-prefix_1_\"]>span",
|
||||
FILTER_THREADS_BUTTON: "button[class=\"button--primary button\"]",
|
||||
GT_IMAGES: "img[src^=\"https://attachments.f95zone.to\"]",
|
||||
BD_ENGINE_ID_SELECTOR: "div[id^=\"btn-prefix_1_\"]>span",
|
||||
BD_STATUS_ID_SELECTOR: "div[id^=\"btn-prefix_4_\"]>span",
|
||||
GT_IMAGES: "img:not([title])[data-src^=\"https://attachments.f95zone.to\"][data-url=\"\"]",
|
||||
GT_TAGS: "a.tagItem",
|
||||
GT_TITLE: "h1.p-title-value",
|
||||
GT_TITLE_PREFIXES: "h1.p-title-value > a.labelLink > span[dir=\"auto\"]",
|
||||
LOGIN_BUTTON: "button.button--icon--login",
|
||||
LOGIN_MESSAGE_ERROR: "div.blockMessage.blockMessage--error.blockMessage--iconic",
|
||||
ONLY_GAMES_THREAD_OPTION: "select[name=\"nodes[]\"] > option[value=\"2\"]",
|
||||
PASSWORD_INPUT: "input[name=\"password\"]",
|
||||
SEARCH_BUTTON: "form.block > * button.button--icon--search",
|
||||
SEARCH_FORM_TEXTBOX: "input[name=\"keywords\"][type=\"search\"]",
|
||||
SEARCH_ONLY_GAMES_OPTION: "select[name=\"c[nodes][]\"] > option[value=\"1\"]",
|
||||
STATUS_ID_SELECTOR: "div[id^=\"btn-prefix_4_\"]>span",
|
||||
GS_POSTS: "article.message-body:first-child > div.bbWrapper:first-of-type",
|
||||
GS_RESULT_THREAD_TITLE: "h3.contentRow-title > a",
|
||||
TITLE_ONLY_CHECKBOX: "form.block > * input[name=\"c[title_only]\"]",
|
||||
WT_UNREAD_THREAD_CHECKBOX: "input[type=\"checkbox\"][name=\"unread\"]",
|
||||
USERNAME_ELEMENT: "a[href=\"/account/\"] > span.p-navgroup-linkText",
|
||||
USERNAME_INPUT: "input[name=\"login\"]",
|
||||
GT_LAST_CHANGELOG: "div.bbCodeBlock-content > div:first-of-type",
|
||||
GT_JSONLD: "script[type=\"application/ld+json\"]",
|
||||
WT_FILTER_POPUP_BUTTON: "a.filterBar-menuTrigger",
|
||||
WT_NEXT_PAGE: "a.pageNav-jump--next",
|
||||
WT_URLS: "a[href^=\"/threads/\"][data-tp-primary]",
|
||||
DOWNLOAD_LINKS_CONTAINER: "span[style=\"font-size: 18px\"]",
|
||||
WT_UNREAD_THREAD_CHECKBOX: "input[type=\"checkbox\"][name=\"unread\"]",
|
||||
GS_POSTS: "article.message-body:first-child > div.bbWrapper:first-of-type",
|
||||
GS_RESULT_THREAD_TITLE: "h3.contentRow-title > a",
|
||||
GS_RESULT_BODY: "div.contentRow-main",
|
||||
GS_MEMBERSHIP: "li > a:not(.username)",
|
||||
GT_LAST_CHANGELOG: "div.bbCodeBlock-content > div:first-of-type",
|
||||
GET_REQUEST_TOKEN: "input[name=\"_xfToken\"]",
|
||||
|
||||
LOGIN_BUTTON: "button.button--icon--login",
|
||||
LOGIN_MESSAGE_ERROR: "div.blockMessage.blockMessage--error.blockMessage--iconic",
|
||||
PASSWORD_INPUT: "input[name=\"password\"]",
|
||||
USERNAME_ELEMENT: "a[href=\"/account/\"] > span.p-navgroup-linkText",
|
||||
USERNAME_INPUT: "input[name=\"login\"]",
|
||||
AVATAR_INFO: "span.avatar",
|
||||
AVATAR_PIC: "a[href=\"/account/\"] > span.avatar > img[class^=\"avatar\"]",
|
||||
FILTER_THREADS_BUTTON: "button[class=\"button--primary button\"]",
|
||||
});
|
||||
|
|
|
@ -13,14 +13,24 @@ const tough = require("tough-cookie");
|
|||
// Modules from file
|
||||
const shared = require("./shared.js");
|
||||
const f95url = require("./constants/url.js");
|
||||
const f95selector = require("./constants/css-selector.js");
|
||||
const LoginResult = require("./classes/login-result.js");
|
||||
|
||||
// Global variables
|
||||
const userAgent =
|
||||
"Mozilla/5.0 (X11; Linux x86_64)" +
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.39 Safari/537.36";
|
||||
const userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) " +
|
||||
"AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0 Safari/605.1.15";
|
||||
axiosCookieJarSupport(axios);
|
||||
const cookieJar = new tough.CookieJar();
|
||||
|
||||
const commonConfig = {
|
||||
headers: {
|
||||
"User-Agent": userAgent,
|
||||
"Connection": "keep-alive"
|
||||
},
|
||||
withCredentials: true,
|
||||
jar: cookieJar // Used to store the token in the PC
|
||||
};
|
||||
|
||||
/**
|
||||
* @protected
|
||||
* Gets the HTML code of a page.
|
||||
|
@ -28,19 +38,13 @@ const cookieJar = new tough.CookieJar();
|
|||
* @returns {Promise<String>} HTML code or `null` if an error arise
|
||||
*/
|
||||
module.exports.fetchHTML = async function (url) {
|
||||
try {
|
||||
const response = await axios.get(url, {
|
||||
headers: {
|
||||
"User-Agent": userAgent
|
||||
},
|
||||
withCredentials: true,
|
||||
jar: cookieJar
|
||||
});
|
||||
return response.data;
|
||||
} catch (e) {
|
||||
shared.logger.error(`Error ${e.message} occurred while trying to fetch ${url}`);
|
||||
// Fetch the response of the platform
|
||||
const response = await exports.fetchGETResponse(url);
|
||||
if (!response) {
|
||||
shared.logger.warn(`Unable to fetch HTML for ${url}`);
|
||||
return null;
|
||||
}
|
||||
return response.data;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -49,12 +53,21 @@ module.exports.fetchHTML = async function (url) {
|
|||
* and token obtained previously. Save cookies on your
|
||||
* device after authentication.
|
||||
* @param {Credentials} credentials Platform access credentials
|
||||
* @returns {Promise<Boolean>} Result of the operation
|
||||
* @returns {Promise<LoginResul>} Result of the operation
|
||||
*/
|
||||
module.exports.autenticate = async function (credentials) {
|
||||
shared.logger.info(`Authenticating with user ${credentials.username}`);
|
||||
if (!credentials.token) throw new Error(`Invalid token for auth: ${credentials.token}`);
|
||||
|
||||
// If the user is already logged, return
|
||||
if(shared.isLogged) {
|
||||
shared.logger.warn(`${credentials.username} already authenticated`);
|
||||
return new LoginResult(true, "Already authenticated");
|
||||
}
|
||||
|
||||
// Secure the URL
|
||||
const secureURL = exports.enforceHttpsUrl(f95url.F95_LOGIN_URL);
|
||||
|
||||
// Prepare the parameters to send to the platform to authenticate
|
||||
const params = new URLSearchParams();
|
||||
params.append("login", credentials.username);
|
||||
|
@ -67,22 +80,23 @@ module.exports.autenticate = async function (credentials) {
|
|||
params.append("website_code", "");
|
||||
params.append("_xfToken", credentials.token);
|
||||
|
||||
const config = {
|
||||
headers: {
|
||||
"User-Agent": userAgent,
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Connection": "keep-alive"
|
||||
},
|
||||
withCredentials: true,
|
||||
jar: cookieJar // Retrieve the stored cookies! What a pain to understand that this is a MUST!
|
||||
};
|
||||
|
||||
try {
|
||||
await axios.post(f95url.F95_LOGIN_URL, params, config);
|
||||
return true;
|
||||
// Try to log-in
|
||||
const response = await axios.post(secureURL, params, commonConfig);
|
||||
|
||||
// Parse the response HTML
|
||||
const $ = cheerio.load(response.data);
|
||||
|
||||
// Get the error message (if any) and remove the new line chars
|
||||
const errorMessage = $("body").find(f95selector.LOGIN_MESSAGE_ERROR).text().replace(/\n/g, "");
|
||||
|
||||
// Return the result of the authentication
|
||||
shared.isLogged = errorMessage === "";
|
||||
if (errorMessage === "") return new LoginResult(true, "Authentication successful");
|
||||
else return new LoginResult(false, errorMessage);
|
||||
} catch (e) {
|
||||
shared.logger.error(`Error ${e.message} occurred while authenticating to ${f95url.F95_LOGIN_URL}`);
|
||||
return false;
|
||||
shared.logger.error(`Error ${e.message} occurred while authenticating to ${secureURL}`);
|
||||
return new LoginResult(false, `Error ${e.message} while authenticating`);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -91,25 +105,63 @@ module.exports.autenticate = async function (credentials) {
|
|||
* @returns {Promise<String>} Token or `null` if an error arise
|
||||
*/
|
||||
module.exports.getF95Token = async function() {
|
||||
try {
|
||||
const config = {
|
||||
headers: {
|
||||
"User-Agent": userAgent,
|
||||
"Connection": "keep-alive"
|
||||
},
|
||||
withCredentials: true,
|
||||
jar: cookieJar // Used to store the token in the PC
|
||||
};
|
||||
|
||||
// Fetch the response of the platform
|
||||
const response = await axios.get(f95url.F95_LOGIN_URL, config);
|
||||
const response = await exports.fetchGETResponse(f95url.F95_LOGIN_URL);
|
||||
if (!response) {
|
||||
shared.logger.warn("Unable to get the token for the session");
|
||||
return null;
|
||||
}
|
||||
|
||||
// The response is a HTML page, we need to find the <input> with name "_xfToken"
|
||||
const $ = cheerio.load(response.data);
|
||||
const token = $("body").find("input[name='_xfToken']").attr("value");
|
||||
const token = $("body").find(f95selector.GET_REQUEST_TOKEN).attr("value");
|
||||
return token;
|
||||
};
|
||||
|
||||
/**
|
||||
* @protected
|
||||
* Gets the basic data used for game data processing
|
||||
* (such as graphics engines and progress statuses)
|
||||
* @deprecated
|
||||
*/
|
||||
module.exports.fetchPlatformData = async function() {
|
||||
// Fetch the response of the platform
|
||||
const response = await exports.fetchGETResponse(f95url.F95_LATEST_UPDATES);
|
||||
if (!response) {
|
||||
shared.logger.warn("Unable to get the token for the session");
|
||||
return;
|
||||
}
|
||||
|
||||
// The response is a HTML page, we need to find
|
||||
// the base data, used when scraping the games
|
||||
const $ = cheerio.load(response.data);
|
||||
|
||||
// Extract the elements
|
||||
const engineElements = $("body").find(f95selector.BD_ENGINE_ID_SELECTOR);
|
||||
const statusesElements = $("body").find(f95selector.BD_STATUS_ID_SELECTOR);
|
||||
|
||||
// Extract the raw text
|
||||
engineElements.each(function extractEngineNames(idx, el) {
|
||||
const engine = cheerio.load(el).text().trim();
|
||||
shared.engines.push(engine);
|
||||
});
|
||||
|
||||
statusesElements.each(function extractEngineNames(idx, el) {
|
||||
const status = cheerio.load(el).text().trim();
|
||||
shared.statuses.push(status);
|
||||
});
|
||||
};
|
||||
|
||||
//#region Utility methods
|
||||
module.exports.fetchGETResponse = async function(url) {
|
||||
// Secure the URL
|
||||
const secureURL = exports.enforceHttpsUrl(url);
|
||||
|
||||
try {
|
||||
// Fetch and return the response
|
||||
return await axios.get(secureURL, commonConfig);
|
||||
} catch (e) {
|
||||
shared.logger.error(`Error ${e.message} occurred while trying to fetch F95 token`);
|
||||
shared.logger.error(`Error ${e.message} occurred while trying to fetch ${secureURL}`);
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
@ -121,8 +173,7 @@ module.exports.getF95Token = async function() {
|
|||
* @returns {String}
|
||||
*/
|
||||
module.exports.enforceHttpsUrl = function (url) {
|
||||
const value = _.isString(url) ? url.replace(/^(https?:)?\/\//, "https://") : null;
|
||||
return value;
|
||||
return _.isString(url) ? url.replace(/^(https?:)?\/\//, "https://") : null;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -187,3 +238,4 @@ module.exports.getUrlRedirect = async function (url) {
|
|||
const response = await ky.head(url);
|
||||
return response.url;
|
||||
};
|
||||
//#endregion Utility methods
|
|
@ -27,33 +27,33 @@ module.exports.getGameInfo = async function (url) {
|
|||
|
||||
// Extract data
|
||||
const titleData = extractInfoFromTitle(body);
|
||||
console.log(titleData);
|
||||
const tags = extractTags(body);
|
||||
console.log(tags);
|
||||
const mainPostData = extractInfoFromMainPost(mainPost);
|
||||
console.log(mainPostData);
|
||||
const prefixesData = parseGamePrefixes(body);
|
||||
const src = extractPreviewSource(body);
|
||||
const changelog = extractChangelog(mainPost);
|
||||
const structuredData = extractStructuredData(body);
|
||||
const parsedInfos = parseMainPostText(structuredData["description"]);
|
||||
const overview = getOverview(structuredData["description"], prefixesData.mod);
|
||||
|
||||
// Obtain the updated URL
|
||||
const redirectUrl = await getUrlRedirect(url);
|
||||
|
||||
// TODO: Check to change
|
||||
const parsedInfos = parseMainPostText(mainPost.text());
|
||||
const overview = getOverview(mainPost.text(), info.isMod);
|
||||
|
||||
// Fill in the GameInfo element with the information obtained
|
||||
const info = new GameInfo();
|
||||
info.name = titleData.name;
|
||||
info.author = titleData.author;
|
||||
info.isMod = titleData.mod;
|
||||
info.engine = titleData.engine;
|
||||
info.status = titleData.status;
|
||||
info.isMod = prefixesData.mod;
|
||||
info.engine = prefixesData.engine;
|
||||
info.status = prefixesData.status;
|
||||
info.tags = tags;
|
||||
info.url = redirectUrl;
|
||||
info.language = parsedInfos.Language;
|
||||
info.overview = overview;
|
||||
info.lastUpdate = titleData.mod ? parsedInfos.UPDATED : parsedInfos.THREAD_UPDATED;
|
||||
info.previewSource = mainPostData.previewSource;
|
||||
info.changelog = mainPostData.changelog;
|
||||
info.supportedOS = parsedInfos.SupportedOS;
|
||||
info.censored = parsedInfos.Censored;
|
||||
info.lastUpdate = parsedInfos.LastUpdate;
|
||||
info.previewSrc = src;
|
||||
info.changelog = changelog;
|
||||
info.version = titleData.version;
|
||||
|
||||
shared.logger.info(`Founded data for ${info.name}`);
|
||||
|
@ -63,41 +63,86 @@ module.exports.getGameInfo = async function (url) {
|
|||
//#region Private methods
|
||||
/**
|
||||
* @private
|
||||
* Extracts all the possible informations from the title, including the prefixes.
|
||||
* Parse the game prefixes obtaining the engine used,
|
||||
* the advancement status and if the game is actually a game or a mod.
|
||||
* @param {cheerio.Cheerio} body Page `body` selector
|
||||
* @returns {Object} Dictionary of values
|
||||
*/
|
||||
function parseGamePrefixes(body) {
|
||||
shared.logger.trace("Parsing prefixes...");
|
||||
|
||||
// Local variables
|
||||
let mod = false,
|
||||
engine = null,
|
||||
status = null;
|
||||
|
||||
// Obtain the title prefixes
|
||||
const prefixeElements = body.find(f95Selector.GT_TITLE_PREFIXES);
|
||||
|
||||
prefixeElements.each(function parseGamePrefix(idx, el) {
|
||||
// Obtain the prefix text
|
||||
let prefix = cheerio.load(el).text().trim();
|
||||
|
||||
// Remove the square brackets
|
||||
prefix = prefix.replace("[", "").replace("]", "");
|
||||
|
||||
// Check what the prefix indicates
|
||||
if (isEngine(prefix)) engine = prefix;
|
||||
else if (isStatus(prefix)) status = prefix;
|
||||
else if (isMod(prefix)) mod = true;
|
||||
});
|
||||
|
||||
// If the status is not set, then the game in in development (Ongoing)
|
||||
if (!status) status = "Ongoing";
|
||||
|
||||
return {
|
||||
engine,
|
||||
status,
|
||||
mod
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @private
|
||||
* Extracts all the possible informations from the title.
|
||||
* @param {cheerio.Cheerio} body Page `body` selector
|
||||
* @returns {Object} Dictionary of values
|
||||
*/
|
||||
function extractInfoFromTitle(body) {
|
||||
shared.logger.trace("Extracting information from title...");
|
||||
const title = body
|
||||
.find(f95Selector.GT_TITLE)
|
||||
.text()
|
||||
.trim();
|
||||
|
||||
// From the title we can extract: Name, author and version
|
||||
// TITLE [VERSION] [AUTHOR]
|
||||
// [PREFIXES] TITLE [VERSION] [AUTHOR]
|
||||
const matches = title.match(/\[(.*?)\]/g);
|
||||
const endIndex = title.indexOf("["); // The open bracket of the version
|
||||
const name = title.substring(0, endIndex).trim();
|
||||
const version = matches[0].trim();
|
||||
const author = matches[1].trim();
|
||||
|
||||
// Parse the title prefixes
|
||||
const prefixeElements = body.find(f95Selector.GT_TITLE_PREFIXES);
|
||||
let mod = false, engine = null, status = null;
|
||||
prefixeElements.each(function parseGamePrefixes(el) {
|
||||
const prefix = el.text().trim();
|
||||
if(isEngine(prefix)) engine = prefix;
|
||||
else if(isStatus(prefix)) status = prefix;
|
||||
else if (isMod(prefix)) mod = true;
|
||||
// Get the title name
|
||||
let name = title;
|
||||
matches.forEach(function replaceElementsInTitle(e) {
|
||||
name = name.replace(e, "");
|
||||
});
|
||||
name = name.trim();
|
||||
|
||||
// The regex [[\]]+ remove the square brackets
|
||||
|
||||
// The version is the penultimate element.
|
||||
// If the matches are less than 2, than the title
|
||||
// is malformes and only the author is fetched
|
||||
// (usually the author is always present)
|
||||
let version = null;
|
||||
if (matches.length >= 2) version = matches[matches.length - 2].replace(/[[\]]+/g, "").trim();
|
||||
else shared.logger.trace(`Malformed title: ${title}`);
|
||||
|
||||
// Last element
|
||||
const author = matches[matches.length - 1].replace(/[[\]]+/g, "").trim();
|
||||
|
||||
return {
|
||||
name,
|
||||
version,
|
||||
author,
|
||||
engine,
|
||||
status,
|
||||
mod
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -108,32 +153,49 @@ function extractInfoFromTitle(body) {
|
|||
* @returns {String[]} List of tags
|
||||
*/
|
||||
function extractTags(body) {
|
||||
shared.logger.trace("Extracting tags...");
|
||||
|
||||
// Get the game tags
|
||||
const tagResults = body.find(f95Selector.GT_TAGS);
|
||||
return tagResults.map((idx, el) => {
|
||||
return el.text().trim();
|
||||
return tagResults.map(function parseGameTags(idx, el) {
|
||||
return cheerio.load(el).text().trim();
|
||||
}).get();
|
||||
}
|
||||
|
||||
/**
|
||||
* @private
|
||||
* Extracts the name of the game, its author and its current version from the title of the page.
|
||||
* @param {cheerio.Cheerio} mainPost Selector of the main post
|
||||
* @returns {Object} Dictionary of values
|
||||
* Gets the URL of the image used as a preview.
|
||||
* @param {cheerio.Cheerio} body Page `body` selector
|
||||
* @returns {String} URL of the image
|
||||
*/
|
||||
function extractInfoFromMainPost(mainPost) {
|
||||
// Get the preview image
|
||||
const previewElement = mainPost.find(f95Selector.GT_IMAGES);
|
||||
const previewSource = previewElement ? previewElement.first().attr("src") : null;
|
||||
function extractPreviewSource(body) {
|
||||
shared.logger.trace("Extracting image preview source...");
|
||||
const image = body.find(f95Selector.GT_IMAGES);
|
||||
|
||||
// Get the latest changelog
|
||||
const changelogElement = mainPost.find(f95Selector.GT_LAST_CHANGELOG);
|
||||
const changelog = changelogElement ? changelogElement.text().trim() : null;
|
||||
// The "src" attribute is rendered only in a second moment,
|
||||
// we need the "static" src value saved in the attribute "data-src"
|
||||
const source = image ? image.attr("data-src") : null;
|
||||
return source;
|
||||
}
|
||||
|
||||
return {
|
||||
previewSource,
|
||||
changelog
|
||||
};
|
||||
/**
|
||||
* @private
|
||||
* Gets the changelog of the latest version.
|
||||
* @param {cheerio.Cheerio} mainPost main post selector
|
||||
* @returns {String} Changelog of the last version or `null` if no changelog is fetched
|
||||
*/
|
||||
function extractChangelog(mainPost) {
|
||||
shared.logger.trace("Extracting last changelog...");
|
||||
|
||||
// Obtain changelog
|
||||
let changelog = mainPost.find(f95Selector.GT_LAST_CHANGELOG).text().trim();
|
||||
|
||||
// Clean changelog
|
||||
changelog = changelog.replace("Spoiler", "");
|
||||
changelog = changelog.replace(/\n+/g, "\n");
|
||||
|
||||
// Return changelog
|
||||
return changelog ? changelog : null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -144,7 +206,9 @@ function extractInfoFromMainPost(mainPost) {
|
|||
* @returns {Object} Dictionary of information
|
||||
*/
|
||||
function parseMainPostText(text) {
|
||||
const dataPairs = {};
|
||||
shared.logger.trace("Parsing main post raw text...");
|
||||
|
||||
const data = {};
|
||||
|
||||
// The information searched in the game post are one per line
|
||||
const splittedText = text.split("\n");
|
||||
|
@ -157,28 +221,80 @@ function parseMainPostText(text) {
|
|||
const value = splitted[1].trim();
|
||||
|
||||
// Add pair to the dict if valid
|
||||
if (value !== "") dataPairs[key] = value;
|
||||
if (value !== "") data[key] = value;
|
||||
}
|
||||
|
||||
return dataPairs;
|
||||
// Parse the standard pairs
|
||||
const parsedDict = {};
|
||||
|
||||
// Check if the game is censored
|
||||
if (data.CENSORED) {
|
||||
const censored = data.CENSORED.toUpperCase() === "NO" ? false : true;
|
||||
parsedDict["Censored"] = censored;
|
||||
delete data.CENSORED;
|
||||
}
|
||||
|
||||
// Last update of the main post
|
||||
if (data.UPDATED) {
|
||||
parsedDict["LastUpdate"] = new Date(data.UPDATED);
|
||||
delete data.UPDATED;
|
||||
}
|
||||
else if (data.THREAD_UPDATED) {
|
||||
parsedDict["LastUpdate"] = new Date(data.THREAD_UPDATED);
|
||||
delete data.THREAD_UPDATED;
|
||||
}
|
||||
|
||||
// Parse the supported OS
|
||||
if (data.OS) {
|
||||
const listOS = [];
|
||||
|
||||
// Usually the string is something like "Windows, Linux, Mac"
|
||||
const splitted = data.OS.split(",");
|
||||
splitted.forEach(function (os) {
|
||||
listOS.push(os.trim());
|
||||
});
|
||||
|
||||
parsedDict["SupportedOS"] = listOS;
|
||||
delete data.OS;
|
||||
}
|
||||
|
||||
// Rename the key for the language
|
||||
if (data.LANGUAGE) {
|
||||
parsedDict["Language"] = data.LANGUAGE;
|
||||
delete data.LANGUAGE;
|
||||
}
|
||||
|
||||
// What remains is added to a sub dictionary
|
||||
parsedDict["Various"] = data;
|
||||
|
||||
return parsedDict;
|
||||
}
|
||||
|
||||
/**
|
||||
* @private
|
||||
* Extracts and processes the JSON-LD values found at the bottom of the page.
|
||||
* @param {cheerio.Cheerio} body Page `body` selector
|
||||
* @returns ???
|
||||
* @returns {Object} JSON-LD or `null` if no valid JSON is found
|
||||
*/
|
||||
function extractStructuredData(body) {
|
||||
const structuredDataElements = body.find("...");
|
||||
for (const el in structuredDataElements) {
|
||||
for (const child in structuredDataElements[el].children) {
|
||||
const data = structuredDataElements[el].children[child].data;
|
||||
console.log(data);
|
||||
// TODO: The @type should be "Book"
|
||||
// TODO: Test here
|
||||
}
|
||||
}
|
||||
shared.logger.trace("Extracting JSON-LD data...");
|
||||
const structuredDataElements = body.find(f95Selector.GT_JSONLD);
|
||||
const json = structuredDataElements.map(function parseScriptTag(idx, el) {
|
||||
// Get the element HTML
|
||||
const html = cheerio.load(el).html().trim();
|
||||
|
||||
// Obtain the JSON-LD
|
||||
const data = html
|
||||
.replace("<script type=\"application/ld+json\">", "")
|
||||
.replace("</script>", "");
|
||||
|
||||
// Convert the string to an object
|
||||
const json = JSON.parse(data);
|
||||
|
||||
// Return only the data of the game
|
||||
if (json["@type"] === "Book") return json;
|
||||
}).get();
|
||||
return json[0] ? json[0] : null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -190,6 +306,7 @@ function extractStructuredData(body) {
|
|||
* @returns {Promise<String>} Game description
|
||||
*/
|
||||
function getOverview(text, mod) {
|
||||
shared.logger.trace("Extracting game overview...");
|
||||
// Get overview (different parsing for game and mod)
|
||||
const overviewEndIndex = mod ? text.indexOf("Updated") : text.indexOf("Thread Updated");
|
||||
return text.substring(0, overviewEndIndex).replace("Overview:\n", "").trim();
|
||||
|
@ -235,6 +352,9 @@ function isMod(prefix) {
|
|||
* @returns {String[]}
|
||||
*/
|
||||
function toUpperCaseArray(a) {
|
||||
// If the array is empty, return
|
||||
if(a.length === 0) return [];
|
||||
|
||||
/**
|
||||
* Makes a string uppercase.
|
||||
* @param {String} s
|
||||
|
|
|
@ -57,7 +57,7 @@ module.exports.searchMod = async function (name) {
|
|||
* @return {Promise<String[]>} List of URLs
|
||||
*/
|
||||
async function fetchResultURLs(url) {
|
||||
shared.logger.info(`Fetching ${url}...`);
|
||||
shared.logger.trace(`Fetching ${url}...`);
|
||||
|
||||
// Fetch HTML and prepare Cheerio
|
||||
const html = await fetchHTML(url);
|
||||
|
@ -82,6 +82,8 @@ async function fetchResultURLs(url) {
|
|||
* @returns {String} URL to thread
|
||||
*/
|
||||
function extractLinkFromResult(selector) {
|
||||
shared.logger.trace("Extracting thread link from result...");
|
||||
|
||||
const partialLink = selector
|
||||
.find(f95Selector.GS_RESULT_THREAD_TITLE)
|
||||
.attr("href")
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
"use strict";
|
||||
|
||||
// Core modules
|
||||
const { join } = require("path");
|
||||
|
||||
// Public modules from npm
|
||||
const log4js = require("log4js");
|
||||
|
||||
/**
|
||||
|
@ -20,37 +18,16 @@ class Shared {
|
|||
* @type Boolean
|
||||
*/
|
||||
static #_isLogged = false;
|
||||
/**
|
||||
* List of cookies obtained from the F95Zone platform.
|
||||
* @type Object[]
|
||||
*/
|
||||
static #_cookies = null;
|
||||
/**
|
||||
* List of possible game engines used for development.
|
||||
* @type String[]
|
||||
*/
|
||||
static #_engines = null;
|
||||
static #_engines = ["ADRIFT", "Flash", "HTML", "Java", "Others", "QSP", "RAGS", "RPGM", "Ren'Py", "Tads", "Unity", "Unreal Engine", "WebGL", "Wolf RPG"];
|
||||
/**
|
||||
* List of possible development statuses that a game can assume.
|
||||
* @type String[]
|
||||
*/
|
||||
static #_statuses = null;
|
||||
/**
|
||||
* Wait instruction for the browser created by puppeteer.
|
||||
* @type String
|
||||
*/
|
||||
static WAIT_STATEMENT = "domcontentloaded";
|
||||
/**
|
||||
* Path to the directory to save the cache generated by the API.
|
||||
* @type String
|
||||
*/
|
||||
static #_cacheDir = "./f95cache";
|
||||
/**
|
||||
* If true, it opens a new browser for each request to
|
||||
* the F95Zone platform, otherwise it reuses the same.
|
||||
* @type Boolean
|
||||
*/
|
||||
static #_isolation = false;
|
||||
static #_statuses = ["Completed", "Onhold", "Abandoned"];
|
||||
/**
|
||||
* Logger object used to write to both file and console.
|
||||
* @type log4js.Logger
|
||||
|
@ -73,13 +50,6 @@ class Shared {
|
|||
static get isLogged() {
|
||||
return this.#_isLogged;
|
||||
}
|
||||
/**
|
||||
* List of cookies obtained from the F95Zone platform.
|
||||
* @returns {Object[]}
|
||||
*/
|
||||
static get cookies() {
|
||||
return this.#_cookies;
|
||||
}
|
||||
/**
|
||||
* List of possible game engines used for development.
|
||||
* @returns {String[]}
|
||||
|
@ -94,42 +64,6 @@ class Shared {
|
|||
static get statuses() {
|
||||
return this.#_statuses;
|
||||
}
|
||||
/**
|
||||
* Directory to save the API cache.
|
||||
* @returns {String}
|
||||
*/
|
||||
static get cacheDir() {
|
||||
return this.#_cacheDir;
|
||||
}
|
||||
/**
|
||||
* Path to the F95 platform cache.
|
||||
* @returns {String}
|
||||
*/
|
||||
static get cookiesCachePath() {
|
||||
return join(this.#_cacheDir, "cookies.json");
|
||||
}
|
||||
/**
|
||||
* Path to the game engine cache.
|
||||
* @returns {String}
|
||||
*/
|
||||
static get enginesCachePath() {
|
||||
return join(this.#_cacheDir, "engines.json");
|
||||
}
|
||||
/**
|
||||
* Path to the cache of possible game states.
|
||||
* @returns {String}
|
||||
*/
|
||||
static get statusesCachePath() {
|
||||
return join(this.#_cacheDir, "statuses.json");
|
||||
}
|
||||
/**
|
||||
* If true, it opens a new browser for each request
|
||||
* to the F95Zone platform, otherwise it reuses the same.
|
||||
* @returns {Boolean}
|
||||
*/
|
||||
static get isolation() {
|
||||
return this.#_isolation;
|
||||
}
|
||||
/**
|
||||
* Logger object used to write to both file and console.
|
||||
* @returns {log4js.Logger}
|
||||
|
@ -140,10 +74,6 @@ class Shared {
|
|||
//#endregion Getters
|
||||
|
||||
//#region Setters
|
||||
static set cookies(val) {
|
||||
this.#_cookies = val;
|
||||
}
|
||||
|
||||
static set engines(val) {
|
||||
this.#_engines = val;
|
||||
}
|
||||
|
@ -152,10 +82,6 @@ class Shared {
|
|||
this.#_statuses = val;
|
||||
}
|
||||
|
||||
static set cacheDir(val) {
|
||||
this.#_cacheDir = val;
|
||||
}
|
||||
|
||||
static set debug(val) {
|
||||
this.#_debug = val;
|
||||
}
|
||||
|
@ -163,10 +89,6 @@ class Shared {
|
|||
static set isLogged(val) {
|
||||
this.#_isLogged = val;
|
||||
}
|
||||
|
||||
static set isolation(val) {
|
||||
this.#_isolation = val;
|
||||
}
|
||||
//#endregion Setters
|
||||
}
|
||||
|
||||
|
|
|
@ -12,27 +12,45 @@ const networkHelper = require("../app/scripts/network-helper.js");
|
|||
// Configure the .env reader
|
||||
dotenv.config();
|
||||
|
||||
// Search for Kingdom Of Deception data
|
||||
searchKOD();
|
||||
// Login
|
||||
auth().then(async function searchGames(result) {
|
||||
if(!result) return;
|
||||
|
||||
async function searchKOD() {
|
||||
// Search for Kingdom Of Deception data
|
||||
await search("kingdom of deception");
|
||||
|
||||
// Search for Perverted Education data
|
||||
await search("perverted education");
|
||||
|
||||
// Search for Corrupted Kingdoms data
|
||||
await search("corrupted kingdoms");
|
||||
|
||||
// Search for Summertime Saga data
|
||||
await search("summertime saga");
|
||||
});
|
||||
|
||||
async function auth() {
|
||||
console.log("Token fetch...");
|
||||
const creds = new Credentials(process.env.F95_USERNAME, process.env.F95_PASSWORD);
|
||||
await creds.fetchToken();
|
||||
console.log(`Token obtained: ${creds.token}`);
|
||||
|
||||
console.log("Authenticating...");
|
||||
const authenticated = await networkHelper.autenticate(creds);
|
||||
console.log(`Authentication result: ${authenticated}`);
|
||||
const result = await networkHelper.autenticate(creds);
|
||||
console.log(`Authentication result: ${result.message}`);
|
||||
|
||||
console.log("Searching KOD...");
|
||||
const urls = await searcher.searchGame("kingdom of deception", creds);
|
||||
return result.success;
|
||||
}
|
||||
|
||||
async function search(gamename) {
|
||||
console.log(`Searching '${gamename}'...`);
|
||||
const urls = await searcher.searchGame(gamename);
|
||||
console.log(`Found: ${urls}`);
|
||||
|
||||
console.log("Scraping data...");
|
||||
for (const url of urls) {
|
||||
const gamedata = await scraper.getGameInfo(url);
|
||||
console.log(gamedata);
|
||||
console.log(`Found ${gamedata.name} (${gamedata.version}) by ${gamedata.author}`);
|
||||
}
|
||||
console.log("Scraping completed!");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue