Addes scraper, need authentication

pull/44/head
MillenniumEarl 2020-10-31 16:00:26 +01:00
parent 20fea5c315
commit 652fe0d3d6
24 changed files with 1392 additions and 277 deletions

View File

@ -3,7 +3,8 @@
"browser": true,
"commonjs": true,
"es2021": true,
"node": true
"node": true,
"mocha": true
},
"extends": "eslint:recommended",
"parser": "babel-eslint",

View File

@ -4,21 +4,21 @@
const fs = require("fs");
// Modules from file
const shared = require("./scripts/shared.js");
const urlK = require("./scripts/constants/url.js");
const selectorK = require("./scripts/constants/css-selector.js");
const urlHelper = require("./scripts/url-helper.js");
const scraper = require("./scripts/game-scraper.js");
const shared = require("../app/scripts/shared.js");
const urlK = require("../app/scripts/constants/url.js");
const selectorK = require("../app/scripts/constants/css-selector.js");
const urlHelper = require("../app/scripts/url-helper.js");
const scraper = require("../app/scripts/game-scraper.js");
const {
prepareBrowser,
preparePage,
} = require("./scripts/puppeteer-helper.js");
const searcher = require("./scripts/game-searcher.js");
} = require("../app/scripts/puppeteer-helper.js");
const searcher = require("../app/scripts/game-searcher.js");
// Classes from file
const GameInfo = require("./scripts/classes/game-info.js");
const LoginResult = require("./scripts/classes/login-result.js");
const UserData = require("./scripts/classes/user-data.js");
const GameInfo = require("../app/scripts/classes/game-info.js");
const LoginResult = require("../app/scripts/classes/login-result.js");
const UserData = require("../app/scripts/classes/user-data.js");
//#region Export classes
module.exports.GameInfo = GameInfo;

View File

@ -17,7 +17,7 @@ class GameInfo {
* URL to the game's official conversation on the F95Zone portal
* @type String
*/
this.f95url = null;
this.url = null;
/**
* Game description
* @type String
@ -42,7 +42,7 @@ class GameInfo {
* Game description image URL
* @type String
*/
this.previewSource = null;
this.previewSrc = null;
/**
* Game version
* @type String
@ -73,41 +73,33 @@ class GameInfo {
* @type String
*/
this.gameDir = null;
/**
* Information on game file download links,
* including information on hosting platforms
* and operating system supported by the specific link
* @type GameDownload[]
*/
this.downloadInfo = [];
//#endregion Properties
}
/**
* Converts the object to a dictionary used for JSON serialization
* Converts the object to a dictionary used for JSON serialization.
*/
/* istanbul ignore next */
toJSON() {
return {
name: this.name,
author: this.author,
f95url: this.f95url,
url: this.url,
overview: this.overview,
engine: this.engine,
status: this.status,
previewSource: this.previewSource,
previewSrc: this.previewSrc,
version: this.version,
lastUpdate: this.lastUpdate,
lastPlayed: this.lastPlayed,
isMod: this.isMod,
changelog: this.changelog,
gameDir: this.gameDir,
downloadInfo: this.downloadInfo,
};
}
/**
* Return a new GameInfo from a JSON string
* Return a new GameInfo from a JSON string.
* @param {String} json JSON string used to create the new object
* @returns {GameInfo}
*/

View File

@ -6,7 +6,7 @@
class UserData {
constructor() {
/**
* User username.
* User name.
* @type String
*/
this.username = "";
@ -17,7 +17,7 @@ class UserData {
this.avatarSrc = null;
/**
* List of followed thread URLs.
* @type URL[]
* @type String[]
*/
this.watchedThreads = [];
}

View File

@ -3,31 +3,29 @@ module.exports = Object.freeze({
AVATAR_PIC: "a[href=\"/account/\"] > span.avatar > img[class^=\"avatar\"]",
ENGINE_ID_SELECTOR: "div[id^=\"btn-prefix_1_\"]>span",
FILTER_THREADS_BUTTON: "button[class=\"button--primary button\"]",
GAME_IMAGES: "img[src^=\"https://attachments.f95zone.to\"]",
GAME_TAGS: "a.tagItem",
GAME_TITLE: "h1.p-title-value",
GAME_TITLE_PREFIXES: "h1.p-title-value > a.labelLink > span[dir=\"auto\"]",
GT_IMAGES: "img[src^=\"https://attachments.f95zone.to\"]",
GT_TAGS: "a.tagItem",
GT_TITLE: "h1.p-title-value",
GT_TITLE_PREFIXES: "h1.p-title-value > a.labelLink > span[dir=\"auto\"]",
LOGIN_BUTTON: "button.button--icon--login",
LOGIN_MESSAGE_ERROR:
"div.blockMessage.blockMessage--error.blockMessage--iconic",
LOGIN_MESSAGE_ERROR: "div.blockMessage.blockMessage--error.blockMessage--iconic",
ONLY_GAMES_THREAD_OPTION: "select[name=\"nodes[]\"] > option[value=\"2\"]",
PASSWORD_INPUT: "input[name=\"password\"]",
SEARCH_BUTTON: "form.block > * button.button--icon--search",
SEARCH_FORM_TEXTBOX: "input[name=\"keywords\"][type=\"search\"]",
SEARCH_ONLY_GAMES_OPTION: "select[name=\"c[nodes][]\"] > option[value=\"1\"]",
STATUS_ID_SELECTOR: "div[id^=\"btn-prefix_4_\"]>span",
THREAD_POSTS:
"article.message-body:first-child > div.bbWrapper:first-of-type",
THREAD_TITLE: "h3.contentRow-title",
GS_POSTS: "article.message-body:first-child > div.bbWrapper:first-of-type",
GS_RESULT_THREAD_TITLE: "h3.contentRow-title > a",
TITLE_ONLY_CHECKBOX: "form.block > * input[name=\"c[title_only]\"]",
UNREAD_THREAD_CHECKBOX: "input[type=\"checkbox\"][name=\"unread\"]",
WT_UNREAD_THREAD_CHECKBOX: "input[type=\"checkbox\"][name=\"unread\"]",
USERNAME_ELEMENT: "a[href=\"/account/\"] > span.p-navgroup-linkText",
USERNAME_INPUT: "input[name=\"login\"]",
WATCHED_THREAD_FILTER_POPUP_BUTTON: "a.filterBar-menuTrigger",
WATCHED_THREAD_NEXT_PAGE: "a.pageNav-jump--next",
WATCHED_THREAD_URLS: "a[href^=\"/threads/\"][data-tp-primary]",
WT_FILTER_POPUP_BUTTON: "a.filterBar-menuTrigger",
WT_NEXT_PAGE: "a.pageNav-jump--next",
WT_URLS: "a[href^=\"/threads/\"][data-tp-primary]",
DOWNLOAD_LINKS_CONTAINER: "span[style=\"font-size: 18px\"]",
SEARCH_THREADS_RESULTS_BODY: "div.contentRow-main",
SEARCH_THREADS_MEMBERSHIP: "li > a:not(.username)",
THREAD_LAST_CHANGELOG: "div.bbCodeBlock-content > div:first-of-type",
GS_RESULT_BODY: "div.contentRow-main",
GS_MEMBERSHIP: "li > a:not(.username)",
GT_LAST_CHANGELOG: "div.bbCodeBlock-content > div:first-of-type",
});

View File

@ -0,0 +1,112 @@
"use strict";
// Public modules from npm
const axios = require("axios").default;
const _ = require("lodash");
const ky = require("ky-universal").create({
throwHttpErrors: false,
});
// Modules from file
const shared = require("./shared.js");
const {
F95_BASE_URL
} = require("./constants/url.js");
/**
* @protected
* Gets the HTML code of a page.
* @param {String} url URL to fetch
* @returns {Promise<String>} HTML code or `null` if an error arise
*/
module.exports.fetchHTML = async function (url) {
const userAgent =
"Mozilla/5.0 (X11; Linux x86_64)" +
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.39 Safari/537.36";
try {
const response = await axios.get(url, {
headers: {
"User-Agent": userAgent
}
});
return response.data;
} catch (e) {
shared.logger.error(`Error ${e.message} occurred while trying to fetch ${url}`);
return null;
}
};
/**
* @protected
* Enforces the scheme of the URL is https and returns the new URL.
* @param {String} url
* @returns {String}
*/
module.exports.enforceHttpsUrl = function (url) {
const value = _.isString(url) ? url.replace(/^(https?:)?\/\//, "https://") : null;
return value;
};
/**
* @protected
* Check if the url belongs to the domain of the F95 platform.
* @param {String} url URL to check
* @returns {Boolean} true if the url belongs to the domain, false otherwise
*/
module.exports.isF95URL = function (url) {
if (url.toString().startsWith(F95_BASE_URL)) return true;
else return false;
};
/**
* @protected
* Checks if the string passed by parameter has a properly formatted and valid path to a URL.
* @param {String} url String to check for correctness
* @returns {Boolean} true if the string is a valid URL, false otherwise
*/
module.exports.isStringAValidURL = function (url) {
try {
new URL(url); // skipcq: JS-0078
return true;
} catch (err) {
return false;
}
};
/**
* @protected
* Check if a particular URL is valid and reachable on the web.
* @param {String} url URL to check
* @param {Boolean} checkRedirect If true, the function will consider redirects a violation and return false
* @returns {Promise<Boolean>} true if the URL exists, false otherwise
*/
module.exports.urlExists = async function (url, checkRedirect) {
if (!exports.isStringAValidURL(url)) {
return false;
}
const response = await ky.head(url);
let valid = response !== undefined && !/4\d\d/.test(response.status);
if (!valid) return false;
if (checkRedirect) {
const redirectUrl = await exports.getUrlRedirect(url);
if (redirectUrl === url) valid = true;
else valid = false;
}
return valid;
};
/**
* @protected
* Check if the URL has a redirect to another page.
* @param {String} url URL to check for redirect
* @returns {Promise<String>} Redirect URL or the passed URL
*/
module.exports.getUrlRedirect = async function (url) {
const response = await ky.head(url);
return response.url;
};

248
app/scripts/scraper.js Normal file
View File

@ -0,0 +1,248 @@
"use strict";
// Public modules from npm
const cheerio = require("cheerio");
// Modules from file
const { fetchHTML, getUrlRedirect } = require("./network-helper.js");
const shared = require("./shared.js");
const GameInfo = require("./classes/game-info.js");
const f95Selector = require("./constants/css-selector.js");
/**
* @protected
* Get information from the game's main page.
* @param {String} url URL of the game/mod to extract data from
* @return {Promise<GameInfo>} Complete information about the game you are
* looking for
*/
module.exports.getGameInfo = async function (url) {
shared.logger.info("Obtaining game info");
// Fetch HTML and prepare Cheerio
const html = await fetchHTML(url);
const $ = cheerio.load(html);
const body = $("body");
const mainPost = $(f95Selector.GS_POSTS).first();
// Extract data
const titleData = extractInfoFromTitle(body);
console.log(titleData);
const tags = extractTags(body);
console.log(tags);
const mainPostData = extractInfoFromMainPost(mainPost);
console.log(mainPostData);
const structuredData = extractStructuredData(body);
// Obtain the updated URL
const redirectUrl = await getUrlRedirect(url);
// TODO: Check to change
const parsedInfos = parseMainPostText(mainPost.text());
const overview = getOverview(mainPost.text(), info.isMod);
// Fill in the GameInfo element with the information obtained
const info = new GameInfo();
info.name = titleData.name;
info.author = titleData.author;
info.isMod = titleData.mod;
info.engine = titleData.engine;
info.status = titleData.status;
info.tags = tags;
info.url = redirectUrl;
info.overview = overview;
info.lastUpdate = titleData.mod ? parsedInfos.UPDATED : parsedInfos.THREAD_UPDATED;
info.previewSource = mainPostData.previewSource;
info.changelog = mainPostData.changelog;
info.version = titleData.version;
shared.logger.info(`Founded data for ${info.name}`);
return info;
};
//#region Private methods
/**
* @private
* Extracts all the possible informations from the title, including the prefixes.
* @param {cheerio.Cheerio} body Page `body` selector
* @returns {Object} Dictionary of values
*/
function extractInfoFromTitle(body) {
const title = body
.find(f95Selector.GT_TITLE)
.text()
.trim();
// From the title we can extract: Name, author and version
// TITLE [VERSION] [AUTHOR]
const matches = title.match(/\[(.*?)\]/);
const endIndex = title.indexOf("["); // The open bracket of the version
const name = title.substring(0, endIndex).trim();
const version = matches[0].trim();
const author = matches[1].trim();
// Parse the title prefixes
const prefixeElements = body.find(f95Selector.GT_TITLE_PREFIXES);
let mod = false, engine = null, status = null;
prefixeElements.each(function parseGamePrefixes(el) {
const prefix = el.text().trim();
if(isEngine(prefix)) engine = prefix;
else if(isStatus(prefix)) status = prefix;
else if (isMod(prefix)) mod = true;
});
return {
name,
version,
author,
engine,
status,
mod
};
}
/**
* @private
* Gets the tags used to classify the game.
* @param {cheerio.Cheerio} body Page `body` selector
* @returns {String[]} List of tags
*/
function extractTags(body) {
// Get the game tags
const tagResults = body.find(f95Selector.GT_TAGS);
return tagResults.map((idx, el) => {
return el.text().trim();
}).get();
}
/**
* @private
* Extracts the name of the game, its author and its current version from the title of the page.
* @param {cheerio.Cheerio} mainPost Selector of the main post
* @returns {Object} Dictionary of values
*/
function extractInfoFromMainPost(mainPost) {
// Get the preview image
const previewElement = mainPost.find(f95Selector.GT_IMAGES);
const previewSource = previewElement ? previewElement.first().attr("src") : null;
// Get the latest changelog
const changelogElement = mainPost.find(f95Selector.GT_LAST_CHANGELOG);
const changelog = changelogElement ? changelogElement.text().trim() : null;
return {
previewSource,
changelog
};
}
/**
* @private
* Process the main post text to get all the useful
* information in the format *DESCRIPTOR : VALUE*.
* @param {String} text Structured text of the post
* @returns {Object} Dictionary of information
*/
function parseMainPostText(text) {
const dataPairs = {};
// The information searched in the game post are one per line
const splittedText = text.split("\n");
for (const line of splittedText) {
if (!line.includes(":")) continue;
// Create pair key/value
const splitted = line.split(":");
const key = splitted[0].trim().toUpperCase().replace(/ /g, "_"); // Uppercase to avoid mismatch
const value = splitted[1].trim();
// Add pair to the dict if valid
if (value !== "") dataPairs[key] = value;
}
return dataPairs;
}
/**
* @private
* Extracts and processes the JSON-LD values found at the bottom of the page.
* @param {cheerio.Cheerio} body Page `body` selector
* @returns ???
*/
function extractStructuredData(body) {
const structuredDataElements = body.find("...");
for (const el in structuredDataElements) {
for (const child in structuredDataElements[el].children) {
const data = structuredDataElements[el].children[child].data;
console.log(data);
// TODO: The @type should be "Book"
// TODO: Test here
}
}
}
/**
* @private
* Get the game description from its web page.
* Different processing depending on whether the game is a mod or not.
* @param {String} text Structured text extracted from the game's web page
* @param {Boolean} mod Specify if it is a game or a mod
* @returns {Promise<String>} Game description
*/
function getOverview(text, mod) {
// Get overview (different parsing for game and mod)
const overviewEndIndex = mod ? text.indexOf("Updated") : text.indexOf("Thread Updated");
return text.substring(0, overviewEndIndex).replace("Overview:\n", "").trim();
}
/**
* @private
* Check if the prefix is a game's engine.
* @param {String} prefix Prefix to check
* @return {Boolean}
*/
function isEngine(prefix) {
const engines = toUpperCaseArray(shared.engines);
return engines.includes(prefix.toUpperCase());
}
/**
* @private
* Check if the prefix is a game's status.
* @param {String} prefix Prefix to check
* @return {Boolean}
*/
function isStatus(prefix) {
const statuses = toUpperCaseArray(shared.statuses);
return statuses.includes(prefix.toUpperCase());
}
/**
* @private
* Check if the prefix indicates a mod.
* @param {String} prefix Prefix to check
* @return {Boolean}
*/
function isMod(prefix) {
const modPrefixes = ["MOD", "CHEAT MOD"];
return modPrefixes.includes(prefix.toUpperCase());
}
/**
* @private
* Makes an array of strings uppercase.
* @param {String[]} a
* @returns {String[]}
*/
function toUpperCaseArray(a) {
/**
* Makes a string uppercase.
* @param {String} s
* @returns {String}
*/
function toUpper(s) {
return s.toUpperCase();
}
return a.map(toUpper);
}
//#endregion Private methods

View File

@ -5,20 +5,21 @@ const cheerio = require("cheerio");
// Modules from file
const { fetchHTML } = require("./network-helper.js");
const shared = require("./scripts/shared.js");
const shared = require("./shared.js");
const f95Selector = require("./constants/css-selector.js");
//#region Public methods
/**
* @protected
* Search for a game on F95Zone and return a list of URLs, one for each search result.
* @param {String} name Game name
* @returns {Promise<String[]>} URLs of results
*/
module.exports = async function searchGame(name) {
module.exports.searchGame = async function (name) {
shared.logger.info(`Searching games with name ${name}`);
// Replace the whitespaces with +
const searchName = name.replaceAll(" ", "+").toUpperCase();
const searchName = encodeURIComponent(name.toUpperCase());
// Prepare the URL (only title, search in the "Games" section, order by relevance)
const url = `https://f95zone.to/search/83456043/?q=${searchName}&t=post&c[child_nodes]=1&c[nodes][0]=2&c[title_only]=1&o=relevance`;
@ -33,10 +34,11 @@ module.exports = async function searchGame(name) {
* @param {String} name Mod name
* @returns {Promise<String[]>} URLs of results
*/
module.exports = async function searchMod(name) {
module.exports.searchMod = async function (name) {
shared.logger.info(`Searching mods with name ${name}`);
// Replace the whitespaces with +
const searchName = name.replaceAll(" ", "+").toUpperCase();
const searchName = encodeURIComponent(name.toUpperCase());
// Prepare the URL (only title, search in the "Mods" section, order by relevance)
const url = `https://f95zone.to/search/83459796/?q=${searchName}&t=post&c[child_nodes]=1&c[nodes][0]=41&c[title_only]=1&o=relevance`;
@ -44,6 +46,7 @@ module.exports = async function searchMod(name) {
// Fetch and parse the result URLs
return await fetchResultURLs(url);
};
//#endregion Public methods
//#region Private methods
/**

View File

@ -1,111 +0,0 @@
"use strict";
class GameInfo {
constructor() {
//#region Properties
/**
* Game name
* @type String
*/
this.name = null;
/**
* Game author
* @type String
*/
this.author = null;
/**
* URL to the game's official conversation on the F95Zone portal
* @type String
*/
this.url = null;
/**
* Game description
* @type String
*/
this.overview = null;
/**
* List of tags associated with the game
* @type String[]
*/
this.tags = [];
/**
* Graphics engine used for game development
* @type String
*/
this.engine = null;
/**
* Progress of the game
* @type String
*/
this.status = null;
/**
* Game description image URL
* @type String
*/
this.previewSrc = null;
/**
* Game version
* @type String
*/
this.version = null;
/**
* Last time the game underwent updates
* @type String
*/
this.lastUpdate = null;
/**
* Last time the local copy of the game was run
* @type String
*/
this.lastPlayed = null;
/**
* Specifies if the game is original or a mod
* @type Boolean
*/
this.isMod = false;
/**
* Changelog for the last version.
* @type String
*/
this.changelog = null;
/**
* Directory containing the local copy of the game
* @type String
*/
this.gameDir = null;
//#endregion Properties
}
/**
* Converts the object to a dictionary used for JSON serialization.
*/
/* istanbul ignore next */
toJSON() {
return {
name: this.name,
author: this.author,
url: this.url,
overview: this.overview,
engine: this.engine,
status: this.status,
previewSrc: this.previewSrc,
version: this.version,
lastUpdate: this.lastUpdate,
lastPlayed: this.lastPlayed,
isMod: this.isMod,
changelog: this.changelog,
gameDir: this.gameDir,
};
}
/**
* Return a new GameInfo from a JSON string.
* @param {String} json JSON string used to create the new object
* @returns {GameInfo}
*/
/* istanbul ignore next */
static fromJSON(json) {
return Object.assign(new GameInfo(), json);
}
}
module.exports = GameInfo;

View File

@ -1,35 +0,0 @@
"use strict";
// Public modules from npm
const axios = require("axios").default;
const _ = require("lodash");
// Modules from file
const shared = require("./scripts/shared.js");
/**
* @protected
* Gets the HTML code of a page.
* @param {String} url URL to fetch
* @returns {Promise<String>} HTML code or `null` if an error arise
*/
module.exports = async function fetchHTML(url) {
try {
const response = await axios.get(url);
return response.data;
} catch {
shared.logger.error(`An error occurred while trying to fetch the URL: ${url}`);
return null;
}
};
/**
* @protected
* Enforces the scheme of the URL is https and returns the new URL.
* @param {String} url
* @returns {String}
*/
module.exports = function enforceHttpsUrl(url) {
const value = _.isString(url) ? url.replace(/^(https?:)?\/\//, "https://") : null;
return value;
};

609
legacy/index.js Normal file
View File

@ -0,0 +1,609 @@
"use strict";
// Core modules
const fs = require("fs");
// Modules from file
const shared = require("../app/scripts/shared.js");
const urlK = require("../app/scripts/constants/url.js");
const selectorK = require("../app/scripts/constants/css-selector.js");
const urlHelper = require("../app/scripts/url-helper.js");
const scraper = require("../app/scripts/game-scraper.js");
const {
prepareBrowser,
preparePage,
} = require("../app/scripts/puppeteer-helper.js");
const searcher = require("../app/scripts/game-searcher.js");
// Classes from file
const GameInfo = require("../app/scripts/classes/game-info.js");
const LoginResult = require("../app/scripts/classes/login-result.js");
const UserData = require("../app/scripts/classes/user-data.js");
//#region Export classes
module.exports.GameInfo = GameInfo;
module.exports.LoginResult = LoginResult;
module.exports.UserData = UserData;
//#endregion Export classes
//#region Export properties
/**
* Shows log messages and other useful functions for module debugging.
* @param {Boolean} value
*/
module.exports.debug = function (value) {
shared.debug = value;
// Configure logger
shared.logger.level = value ? "debug" : "warn";
};
/**
* @public
* Indicates whether a user is logged in to the F95Zone platform or not.
* @returns {String}
*/
module.exports.isLogged = function () {
return shared.isLogged;
};
/**
* @public
* If true, it opens a new browser for each request
* to the F95Zone platform, otherwise it reuses the same.
* @returns {String}
*/
module.exports.setIsolation = function (value) {
shared.isolation = value;
};
/**
* @public
* Path to the cache directory
* @returns {String}
*/
module.exports.getCacheDir = function () {
return shared.cacheDir;
};
/**
* @public
* Set path to the cache directory
* @returns {String}
*/
module.exports.setCacheDir = function (value) {
shared.cacheDir = value;
// Create directory if it doesn't exist
if (!fs.existsSync(shared.cacheDir)) fs.mkdirSync(shared.cacheDir);
};
/**
* @public
* Set local chromium path.
* @returns {String}
*/
module.exports.setChromiumPath = function (value) {
shared.chromiumLocalPath = value;
};
//#endregion Export properties
//#region Global variables
var _browser = null;
const USER_NOT_LOGGED = "User not authenticated, unable to continue";
//#endregion
//#region Export methods
/**
* @public
* Log in to the F95Zone platform.
* This **must** be the first operation performed before accessing any other script functions.
* @param {String} username Username used for login
* @param {String} password Password used for login
* @returns {Promise<LoginResult>} Result of the operation
*/
module.exports.login = async function (username, password) {
if (shared.isLogged) {
shared.logger.info("Already logged in");
const result = new LoginResult(true, "Already logged in");
return result;
}
// If cookies are loaded, use them to authenticate
shared.cookies = loadCookies();
if (shared.cookies !== null) {
shared.logger.info("Valid session, no need to re-authenticate");
shared.isLogged = true;
const result = new LoginResult(true, "Logged with cookies");
return result;
}
// Else, log in throught browser
shared.logger.info(
"No saved sessions or expired session, login on the platform"
);
if (_browser === null && !shared.isolation) _browser = await prepareBrowser();
const browser = shared.isolation ? await prepareBrowser() : _browser;
const result = await loginF95(browser, username, password);
shared.isLogged = result.success;
if (result.success) {
// Reload cookies
shared.cookies = loadCookies();
shared.logger.info("User logged in through the platform");
} else {
shared.logger.warn("Error during authentication: " + result.message);
}
if (shared.isolation) await browser.close();
return result;
};
/**
* @public
* This method loads the main data from the F95 portal
* used to provide game information. You **must** be logged
* in to the portal before calling this method.
* @returns {Promise<Boolean>} Result of the operation
*/
module.exports.loadF95BaseData = async function () {
if (!shared.isLogged || !shared.cookies) {
shared.logger.warn(USER_NOT_LOGGED);
return false;
}
shared.logger.info("Loading base data...");
// Prepare a new web page
if (_browser === null && !shared.isolation) _browser = await prepareBrowser();
const browser = shared.isolation ? await prepareBrowser() : _browser;
const page = await preparePage(browser); // Set new isolated page
await page.setCookie(...shared.cookies); // Set cookies to avoid login
// Go to latest update page and wait for it to load
await page.goto(urlK.F95_LATEST_UPDATES, {
waitUntil: shared.WAIT_STATEMENT,
});
// Obtain engines (disk/online)
await page.waitForSelector(selectorK.ENGINE_ID_SELECTOR);
shared.engines = await loadValuesFromLatestPage(
page,
shared.enginesCachePath,
selectorK.ENGINE_ID_SELECTOR,
"engines"
);
// Obtain statuses (disk/online)
await page.waitForSelector(selectorK.STATUS_ID_SELECTOR);
shared.statuses = await loadValuesFromLatestPage(
page,
shared.statusesCachePath,
selectorK.STATUS_ID_SELECTOR,
"statuses"
);
await page.close();
if (shared.isolation) await browser.close();
shared.logger.info("Base data loaded");
return true;
};
/**
* @public
* Chek if exists a new version of the game.
* You **must** be logged in to the portal before calling this method.
* @param {GameInfo} info Information about the game to get the version for
* @returns {Promise<Boolean>} true if an update is available, false otherwise
*/
module.exports.chekIfGameHasUpdate = async function (info) {
if (!shared.isLogged || !shared.cookies) {
shared.logger.warn(USER_NOT_LOGGED);
return false;
}
// F95 change URL at every game update,
// so if the URL is different an update is available
const exists = await urlHelper.urlExists(info.f95url, true);
if (!exists) return true;
// Parse version from title
if (_browser === null && !shared.isolation) _browser = await prepareBrowser();
const browser = shared.isolation ? await prepareBrowser() : _browser;
const onlineVersion = await scraper.getGameVersionFromTitle(browser, info);
if (shared.isolation) await browser.close();
return onlineVersion.toUpperCase() !== info.version.toUpperCase();
};
/**
* @public
* Starting from the name, it gets all the information about the game you are looking for.
* You **must** be logged in to the portal before calling this method.
* @param {String} name Name of the game searched
* @param {Boolean} includeMods Indicates whether to also take mods into account when searching
* @returns {Promise<GameInfo[]>} List of information obtained where each item corresponds to
* an identified game (in the case of homonymy of titles)
*/
module.exports.getGameData = async function (name, includeMods) {
if (!shared.isLogged || !shared.cookies) {
shared.logger.warn(USER_NOT_LOGGED);
return null;
}
// Gets the search results of the game being searched for
if (_browser === null && !shared.isolation) _browser = await prepareBrowser();
const browser = shared.isolation ? await prepareBrowser() : _browser;
const urlList = await searcher.getSearchGameResults(browser, name);
// Process previous partial results
const promiseList = [];
for (const url of urlList) {
// Start looking for information
promiseList.push(scraper.getGameInfo(browser, url));
}
// Filter for mods
const result = [];
for (const info of await Promise.all(promiseList)) {
// Ignore empty results
if (!info) continue;
// Skip mods if not required
if (info.isMod && !includeMods) continue;
// Else save data
result.push(info);
}
if (shared.isolation) await browser.close();
return result;
};
/**
* @public
* Starting from the url, it gets all the information about the game you are looking for.
* You **must** be logged in to the portal before calling this method.
* @param {String} url URL of the game to obtain information of
* @returns {Promise<GameInfo>} Information about the game. If no game was found, null is returned
*/
module.exports.getGameDataFromURL = async function (url) {
if (!shared.isLogged || !shared.cookies) {
shared.logger.warn(USER_NOT_LOGGED);
return null;
}
// Check URL
const exists = await urlHelper.urlExists(url);
if (!exists) throw new URIError(url + " is not a valid URL");
if (!urlHelper.isF95URL(url))
throw new Error(url + " is not a valid F95Zone URL");
// Gets the search results of the game being searched for
if (_browser === null && !shared.isolation) _browser = await prepareBrowser();
const browser = shared.isolation ? await prepareBrowser() : _browser;
// Get game data
const result = await scraper.getGameInfo(browser, url);
if (shared.isolation) await browser.close();
return result;
};
/**
* @public
* Gets the data of the currently logged in user.
* You **must** be logged in to the portal before calling this method.
* @returns {Promise<UserData>} Data of the user currently logged in
*/
module.exports.getUserData = async function () {
if (!shared.isLogged || !shared.cookies) {
shared.logger.warn(USER_NOT_LOGGED);
return null;
}
// Prepare a new web page
if (_browser === null && !shared.isolation) _browser = await prepareBrowser();
const browser = shared.isolation ? await prepareBrowser() : _browser;
const page = await preparePage(browser); // Set new isolated page
await page.setCookie(...shared.cookies); // Set cookies to avoid login
await page.goto(urlK.F95_BASE_URL); // Go to base page
// Explicitly wait for the required items to load
await Promise.all([
page.waitForSelector(selectorK.USERNAME_ELEMENT),
page.waitForSelector(selectorK.AVATAR_PIC),
]);
const threads = getUserWatchedGameThreads(browser);
const username = await page.evaluate(
/* istanbul ignore next */ (selector) =>
document.querySelector(selector).innerText,
selectorK.USERNAME_ELEMENT
);
const avatarSrc = await page.evaluate(
/* istanbul ignore next */ (selector) =>
document.querySelector(selector).getAttribute("src"),
selectorK.AVATAR_PIC
);
const ud = new UserData();
ud.username = username;
ud.avatarSrc = urlHelper.isStringAValidURL(avatarSrc) ? avatarSrc : null;
ud.watchedThreads = await threads;
await page.close();
if (shared.isolation) await browser.close();
return ud;
};
/**
* @public
* Logout from the current user and gracefully close shared browser.
* You **must** be logged in to the portal before calling this method.
*/
module.exports.logout = async function () {
if (!shared.isLogged || !shared.cookies) {
shared.logger.warn(USER_NOT_LOGGED);
return;
}
// Logout
shared.isLogged = false;
// Gracefully close shared browser
if (!shared.isolation && _browser !== null) {
await _browser.close();
_browser = null;
}
};
//#endregion
//#region Private methods
//#region Cookies functions
/**
* @private
* Loads and verifies the expiration of previously stored cookies from disk
* if they exist, otherwise it returns null.
* @return {object[]} List of dictionaries or null if cookies don't exist
*/
function loadCookies() {
// Check the existence of the cookie file
if (fs.existsSync(shared.cookiesCachePath)) {
// Read cookies
const cookiesJSON = fs.readFileSync(shared.cookiesCachePath);
const cookies = JSON.parse(cookiesJSON);
// Check if the cookies have expired
for (const cookie of cookies) {
if (isCookieExpired(cookie)) return null;
}
// Cookies loaded and verified
return cookies;
} else return null;
}
/**
* @private
* Check the validity of a cookie.
* @param {object} cookie Cookies to verify the validity. It's a dictionary
* @returns {Boolean} true if the cookie has expired, false otherwise
*/
function isCookieExpired(cookie) {
// Local variables
let expiredCookies = false;
// Ignore cookies that never expire
const expirationUnixTimestamp = cookie.expire;
if (expirationUnixTimestamp !== "-1") {
// Convert UNIX epoch timestamp to normal Date
const expirationDate = new Date(expirationUnixTimestamp * 1000);
if (expirationDate < Date.now()) {
shared.logger.warn(
"Cookie " + cookie.name + " expired, you need to re-authenticate"
);
expiredCookies = true;
}
}
return expiredCookies;
}
//#endregion Cookies functions
//#region Latest Updates page parserer
/**
* @private
* If present, it reads the file containing the searched values (engines or states)
* from the disk, otherwise it connects to the F95 portal (at the page
* https://f95zone.to/latest) and downloads them.
* @param {puppeteer.Page} page Page used to locate the required elements
* @param {String} path Path to disk of the JSON file containing the data to read / write
* @param {String} selector CSS selector of the required elements
* @param {String} elementRequested Required element (engines or states) used to detail log messages
* @returns {Promise<String[]>} List of required values in uppercase
*/
async function loadValuesFromLatestPage(
page,
path,
selector,
elementRequested
) {
// If the values already exist they are loaded from disk without having to connect to F95
shared.logger.info("Load " + elementRequested + " from disk...");
if (fs.existsSync(path)) {
const valueJSON = fs.readFileSync(path);
return JSON.parse(valueJSON);
}
// Otherwise, connect and download the data from the portal
shared.logger.info("No " + elementRequested + " cached, downloading...");
const values = await getValuesFromLatestPage(
page,
selector,
"Getting " + elementRequested + " from page"
);
fs.writeFileSync(path, JSON.stringify(values));
return values;
}
/**
* @private
* Gets all the textual values of the elements present
* in the F95 portal page and identified by the selector
* passed by parameter
* @param {puppeteer.Page} page Page used to locate items specified by the selector
* @param {String} selector CSS selector
* @param {String} logMessage Log message indicating which items the selector is requesting
* @return {Promise<String[]>} List of uppercase strings indicating the textual values of the elements identified by the selector
*/
async function getValuesFromLatestPage(page, selector, logMessage) {
shared.logger.info(logMessage);
const result = [];
const elements = await page.$$(selector);
for (const element of elements) {
const text = await element.evaluate(
/* istanbul ignore next */ (e) => e.innerText
);
// Save as upper text for better match if used in query
result.push(text.toUpperCase());
}
return result;
}
//#endregion
//#region User
/**
* @private
* Log in to the F95Zone portal and, if successful, save the cookies.
* @param {puppeteer.Browser} browser Browser object used for navigation
* @param {String} username Username to use during login
* @param {String} password Password to use during login
* @returns {Promise<LoginResult>} Result of the operation
*/
async function loginF95(browser, username, password) {
const page = await preparePage(browser); // Set new isolated page
await page.goto(urlK.F95_LOGIN_URL); // Go to login page
// Explicitly wait for the required items to load
await Promise.all([
page.waitForSelector(selectorK.USERNAME_INPUT),
page.waitForSelector(selectorK.PASSWORD_INPUT),
page.waitForSelector(selectorK.LOGIN_BUTTON),
]);
await page.type(selectorK.USERNAME_INPUT, username); // Insert username
await page.type(selectorK.PASSWORD_INPUT, password); // Insert password
await Promise.all([
page.click(selectorK.LOGIN_BUTTON), // Click on the login button
page.waitForNavigation({
waitUntil: shared.WAIT_STATEMENT,
}), // Wait for page to load
]);
// Prepare result
let message = "";
// Check if the user is logged in
const success = await page.evaluate(
/* istanbul ignore next */ (selector) =>
document.querySelector(selector) !== null,
selectorK.AVATAR_INFO
);
const errorMessageExists = await page.evaluate(
/* istanbul ignore next */
(selector) => document.querySelector(selector) !== null,
selectorK.LOGIN_MESSAGE_ERROR
);
// Save cookies to avoid re-auth
if (success) {
const c = await page.cookies();
fs.writeFileSync(shared.cookiesCachePath, JSON.stringify(c));
message = "Authentication successful";
} else if (errorMessageExists) {
const errorMessage = await page.evaluate(
/* istanbul ignore next */ (selector) =>
document.querySelector(selector).innerText,
selectorK.LOGIN_MESSAGE_ERROR
);
if (errorMessage === "Incorrect password. Please try again.") {
message = "Incorrect password";
} else if (
errorMessage ===
"The requested user '" + username + "' could not be found."
) {
// The escaped quotes are important!
message = "Incorrect username";
} else message = errorMessage;
} else message = "Unknown error";
await page.close(); // Close the page
return new LoginResult(success, message);
}
/**
* @private
* Gets the list of URLs of threads the user follows.
* @param {puppeteer.Browser} browser Browser object used for navigation
* @returns {Promise<String[]>} URL list
*/
async function getUserWatchedGameThreads(browser) {
const page = await preparePage(browser); // Set new isolated page
await page.goto(urlK.F95_WATCHED_THREADS); // Go to the thread page
// Explicitly wait for the required items to load
await page.waitForSelector(selectorK.WATCHED_THREAD_FILTER_POPUP_BUTTON);
// Show the popup
await Promise.all([
page.click(selectorK.WATCHED_THREAD_FILTER_POPUP_BUTTON),
page.waitForSelector(selectorK.UNREAD_THREAD_CHECKBOX),
page.waitForSelector(selectorK.ONLY_GAMES_THREAD_OPTION),
page.waitForSelector(selectorK.FILTER_THREADS_BUTTON),
]);
// Set the filters
await page.evaluate(
/* istanbul ignore next */ (selector) =>
document.querySelector(selector).removeAttribute("checked"),
selectorK.UNREAD_THREAD_CHECKBOX
); // Also read the threads already read
// Filter the threads
await page.click(selectorK.ONLY_GAMES_THREAD_OPTION);
await page.click(selectorK.FILTER_THREADS_BUTTON);
await page.waitForSelector(selectorK.WATCHED_THREAD_URLS);
// Get the threads urls
const urls = [];
let nextPageExists = false;
do {
// Get all the URLs
for (const handle of await page.$$(selectorK.WATCHED_THREAD_URLS)) {
const src = await page.evaluate(
/* istanbul ignore next */ (element) => element.href,
handle
);
// If 'unread' is left, it will redirect to the last unread post
const url = src.replace("/unread", "");
urls.push(url);
}
nextPageExists = await page.evaluate(
/* istanbul ignore next */ (selector) => document.querySelector(selector),
selectorK.WATCHED_THREAD_NEXT_PAGE
);
// Click to next page
if (nextPageExists) {
await page.click(selectorK.WATCHED_THREAD_NEXT_PAGE);
await page.waitForSelector(selectorK.WATCHED_THREAD_URLS);
}
} while (nextPageExists);
await page.close();
return urls;
}
//#endregion User
//#endregion Private methods

View File

@ -0,0 +1,119 @@
"use strict";
class GameInfo {
constructor() {
//#region Properties
/**
* Game name
* @type String
*/
this.name = null;
/**
* Game author
* @type String
*/
this.author = null;
/**
* URL to the game's official conversation on the F95Zone portal
* @type String
*/
this.f95url = null;
/**
* Game description
* @type String
*/
this.overview = null;
/**
* List of tags associated with the game
* @type String[]
*/
this.tags = [];
/**
* Graphics engine used for game development
* @type String
*/
this.engine = null;
/**
* Progress of the game
* @type String
*/
this.status = null;
/**
* Game description image URL
* @type String
*/
this.previewSource = null;
/**
* Game version
* @type String
*/
this.version = null;
/**
* Last time the game underwent updates
* @type String
*/
this.lastUpdate = null;
/**
* Last time the local copy of the game was run
* @type String
*/
this.lastPlayed = null;
/**
* Specifies if the game is original or a mod
* @type Boolean
*/
this.isMod = false;
/**
* Changelog for the last version.
* @type String
*/
this.changelog = null;
/**
* Directory containing the local copy of the game
* @type String
*/
this.gameDir = null;
/**
* Information on game file download links,
* including information on hosting platforms
* and operating system supported by the specific link
* @type GameDownload[]
*/
this.downloadInfo = [];
//#endregion Properties
}
/**
* Converts the object to a dictionary used for JSON serialization
*/
/* istanbul ignore next */
toJSON() {
return {
name: this.name,
author: this.author,
f95url: this.f95url,
overview: this.overview,
engine: this.engine,
status: this.status,
previewSource: this.previewSource,
version: this.version,
lastUpdate: this.lastUpdate,
lastPlayed: this.lastPlayed,
isMod: this.isMod,
changelog: this.changelog,
gameDir: this.gameDir,
downloadInfo: this.downloadInfo,
};
}
/**
* Return a new GameInfo from a JSON string
* @param {String} json JSON string used to create the new object
* @returns {GameInfo}
*/
/* istanbul ignore next */
static fromJSON(json) {
return Object.assign(new GameInfo(), json);
}
}
module.exports = GameInfo;

View File

@ -3,29 +3,31 @@ module.exports = Object.freeze({
AVATAR_PIC: "a[href=\"/account/\"] > span.avatar > img[class^=\"avatar\"]",
ENGINE_ID_SELECTOR: "div[id^=\"btn-prefix_1_\"]>span",
FILTER_THREADS_BUTTON: "button[class=\"button--primary button\"]",
GT_IMAGES: "img[src^=\"https://attachments.f95zone.to\"]",
GT_TAGS: "a.tagItem",
GT_TITLE: "h1.p-title-value",
GT_TITLE_PREFIXES: "h1.p-title-value > a.labelLink > span[dir=\"auto\"]",
GAME_IMAGES: "img[src^=\"https://attachments.f95zone.to\"]",
GAME_TAGS: "a.tagItem",
GAME_TITLE: "h1.p-title-value",
GAME_TITLE_PREFIXES: "h1.p-title-value > a.labelLink > span[dir=\"auto\"]",
LOGIN_BUTTON: "button.button--icon--login",
LOGIN_MESSAGE_ERROR: "div.blockMessage.blockMessage--error.blockMessage--iconic",
LOGIN_MESSAGE_ERROR:
"div.blockMessage.blockMessage--error.blockMessage--iconic",
ONLY_GAMES_THREAD_OPTION: "select[name=\"nodes[]\"] > option[value=\"2\"]",
PASSWORD_INPUT: "input[name=\"password\"]",
SEARCH_BUTTON: "form.block > * button.button--icon--search",
SEARCH_FORM_TEXTBOX: "input[name=\"keywords\"][type=\"search\"]",
SEARCH_ONLY_GAMES_OPTION: "select[name=\"c[nodes][]\"] > option[value=\"1\"]",
STATUS_ID_SELECTOR: "div[id^=\"btn-prefix_4_\"]>span",
THREAD_POSTS: "article.message-body:first-child > div.bbWrapper:first-of-type",
GS_RESULT_THREAD_TITLE: "h3.contentRow-title > a",
THREAD_POSTS:
"article.message-body:first-child > div.bbWrapper:first-of-type",
THREAD_TITLE: "h3.contentRow-title",
TITLE_ONLY_CHECKBOX: "form.block > * input[name=\"c[title_only]\"]",
WT_UNREAD_THREAD_CHECKBOX: "input[type=\"checkbox\"][name=\"unread\"]",
UNREAD_THREAD_CHECKBOX: "input[type=\"checkbox\"][name=\"unread\"]",
USERNAME_ELEMENT: "a[href=\"/account/\"] > span.p-navgroup-linkText",
USERNAME_INPUT: "input[name=\"login\"]",
WT_FILTER_POPUP_BUTTON: "a.filterBar-menuTrigger",
WT_NEXT_PAGE: "a.pageNav-jump--next",
WT_URLS: "a[href^=\"/threads/\"][data-tp-primary]",
WATCHED_THREAD_FILTER_POPUP_BUTTON: "a.filterBar-menuTrigger",
WATCHED_THREAD_NEXT_PAGE: "a.pageNav-jump--next",
WATCHED_THREAD_URLS: "a[href^=\"/threads/\"][data-tp-primary]",
DOWNLOAD_LINKS_CONTAINER: "span[style=\"font-size: 18px\"]",
GS_RESULT_BODY: "div.contentRow-main",
GS_MEMBERSHIP: "li > a:not(.username)",
SEARCH_THREADS_RESULTS_BODY: "div.contentRow-main",
SEARCH_THREADS_MEMBERSHIP: "li > a:not(.username)",
THREAD_LAST_CHANGELOG: "div.bbCodeBlock-content > div:first-of-type",
});

173
legacy/scripts/shared.js Normal file
View File

@ -0,0 +1,173 @@
"use strict";
// Core modules
const { join } = require("path");
const log4js = require("log4js");
/**
* Class containing variables shared between modules.
*/
class Shared {
//#region Properties
/**
* Shows log messages and other useful functions for module debugging.
* @type Boolean
*/
static #_debug = false;
/**
* Indicates whether a user is logged in to the F95Zone platform or not.
* @type Boolean
*/
static #_isLogged = false;
/**
* List of cookies obtained from the F95Zone platform.
* @type Object[]
*/
static #_cookies = null;
/**
* List of possible game engines used for development.
* @type String[]
*/
static #_engines = null;
/**
* List of possible development statuses that a game can assume.
* @type String[]
*/
static #_statuses = null;
/**
* Wait instruction for the browser created by puppeteer.
* @type String
*/
static WAIT_STATEMENT = "domcontentloaded";
/**
* Path to the directory to save the cache generated by the API.
* @type String
*/
static #_cacheDir = "./f95cache";
/**
* If true, it opens a new browser for each request to
* the F95Zone platform, otherwise it reuses the same.
* @type Boolean
*/
static #_isolation = false;
/**
* Logger object used to write to both file and console.
* @type log4js.Logger
*/
static #_logger = log4js.getLogger();
//#endregion Properties
//#region Getters
/**
* Shows log messages and other useful functions for module debugging.
* @returns {Boolean}
*/
static get debug() {
return this.#_debug;
}
/**
* Indicates whether a user is logged in to the F95Zone platform or not.
* @returns {Boolean}
*/
static get isLogged() {
return this.#_isLogged;
}
/**
* List of cookies obtained from the F95Zone platform.
* @returns {Object[]}
*/
static get cookies() {
return this.#_cookies;
}
/**
* List of possible game engines used for development.
* @returns {String[]}
*/
static get engines() {
return this.#_engines;
}
/**
* List of possible development states that a game can assume.
* @returns {String[]}
*/
static get statuses() {
return this.#_statuses;
}
/**
* Directory to save the API cache.
* @returns {String}
*/
static get cacheDir() {
return this.#_cacheDir;
}
/**
* Path to the F95 platform cache.
* @returns {String}
*/
static get cookiesCachePath() {
return join(this.#_cacheDir, "cookies.json");
}
/**
* Path to the game engine cache.
* @returns {String}
*/
static get enginesCachePath() {
return join(this.#_cacheDir, "engines.json");
}
/**
* Path to the cache of possible game states.
* @returns {String}
*/
static get statusesCachePath() {
return join(this.#_cacheDir, "statuses.json");
}
/**
* If true, it opens a new browser for each request
* to the F95Zone platform, otherwise it reuses the same.
* @returns {Boolean}
*/
static get isolation() {
return this.#_isolation;
}
/**
* Logger object used to write to both file and console.
* @returns {log4js.Logger}
*/
static get logger() {
return this.#_logger;
}
//#endregion Getters
//#region Setters
static set cookies(val) {
this.#_cookies = val;
}
static set engines(val) {
this.#_engines = val;
}
static set statuses(val) {
this.#_statuses = val;
}
static set cacheDir(val) {
this.#_cacheDir = val;
}
static set debug(val) {
this.#_debug = val;
}
static set isLogged(val) {
this.#_isLogged = val;
}
static set isolation(val) {
this.#_isolation = val;
}
//#endregion Setters
}
module.exports = Shared;

View File

@ -35,9 +35,7 @@
"cheerio": "^1.0.0-rc.3",
"ky": "^0.24.0",
"ky-universal": "^0.8.2",
"log4js": "^6.3.0",
"node-html-parser": "^1.2.21",
"puppeteer": "^5.3.1"
"log4js": "^6.3.0"
},
"devDependencies": {
"babel-eslint": "^10.1.0",

View File

@ -1,15 +1,21 @@
const F95API = require("../app/index.js");
"use strict";
F95API.debug(true);
main();
// Modules from file
const searcher = require("../plain-html/scripts/searcher.js");
const scraper = require("../plain-html/scripts/scraper.js");
async function main() {
const loginResult = await F95API.login("MillenniumEarl", "f9vTcRNuvxj4YpK");
// Search for Kingdom Of Deception data
searchKOD();
if (loginResult.success) {
await F95API.loadF95BaseData();
const gameData = await F95API.getGameData("a struggle with sin", false);
console.log(gameData);
async function searchKOD() {
console.log("Searching KOD...");
const urls = await searcher.searchGame("kingdom of deception");
console.log(`Found: ${urls}`);
console.log("Scraping data...");
for (const url of urls) {
const gamedata = await scraper.getGameInfo(url);
console.log(gamedata);
}
F95API.logout();
console.log("Scraping completed!");
}