Merge branch 'master' of https://github.com/MillenniumEarl/F95API
commit
8a2fd49308
|
@ -1,4 +1,7 @@
|
||||||
# F95API
|
# F95API
|
||||||
Unofficial Node JS module for scraping F95Zone platform
|
|
||||||
|
Unofficial Node JS module for scraping F95Zone platform
|
||||||
|
|
||||||
[![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2FMillenniumEarl%2FF95API.svg?type=shield)](https://app.fossa.com/projects/git%2Bgithub.com%2FMillenniumEarl%2FF95API?ref=badge_shield)
|
[![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2FMillenniumEarl%2FF95API.svg?type=shield)](https://app.fossa.com/projects/git%2Bgithub.com%2FMillenniumEarl%2FF95API?ref=badge_shield)
|
||||||
|
[![DeepSource](https://deepsource.io/gh/MillenniumEarl/F95API.svg/?label=active+issues&show_trend=true)](https://deepsource.io/gh/MillenniumEarl/F95API/?ref=repository-badge)
|
||||||
|
[![codecov](https://codecov.io/gh/MillenniumEarl/F95API/branch/master/graph/badge.svg?token=KHN1TNIH7D)](undefined)
|
||||||
|
|
|
@ -1,17 +1,17 @@
|
||||||
'use strict';
|
"use strict";
|
||||||
|
|
||||||
// Public modules from npm
|
// Public modules from npm
|
||||||
const HTMLParser = require('node-html-parser');
|
const HTMLParser = require("node-html-parser");
|
||||||
const puppeteer = require('puppeteer');
|
const puppeteer = require("puppeteer");
|
||||||
const urlExist = require('url-exist');
|
const urlExist = require("url-exist");
|
||||||
|
|
||||||
// Modules from file
|
// Modules from file
|
||||||
const shared = require('./shared.js');
|
const shared = require("./shared.js");
|
||||||
const selectors = require('./costants/css-selectors.js');
|
const selectors = require("./costants/css-selectors.js");
|
||||||
const { preparePage } = require('./puppeteer-helper.js');
|
const { preparePage } = require("./puppeteer-helper.js");
|
||||||
const GameDownload = require('./classes/game-download.js');
|
const GameDownload = require("./classes/game-download.js");
|
||||||
const GameInfo = require('./classes/game-info.js');
|
const GameInfo = require("./classes/game-info.js");
|
||||||
const { isStringAValidURL, isF95URL } = require('./urls-helper.js');
|
const { isStringAValidURL, isF95URL } = require("./urls-helper.js");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @protected
|
* @protected
|
||||||
|
@ -20,18 +20,18 @@ const { isStringAValidURL, isF95URL } = require('./urls-helper.js');
|
||||||
* @param {URL} url URL of the game/mod to extract data from
|
* @param {URL} url URL of the game/mod to extract data from
|
||||||
* @return {Promise<GameInfo>} Complete information about the game you are looking for
|
* @return {Promise<GameInfo>} Complete information about the game you are looking for
|
||||||
*/
|
*/
|
||||||
module.exports.getGameInfo = async function(browser, url) {
|
module.exports.getGameInfo = async function (browser, url) {
|
||||||
if (shared.debug) console.log('Obtaining game info');
|
if (shared.debug) console.log("Obtaining game info");
|
||||||
|
|
||||||
// Verify the correctness of the URL
|
// Verify the correctness of the URL
|
||||||
if (!isF95URL(url)) throw url + ' is not a valid F95Zone URL';
|
if (!isF95URL(url)) throw url + " is not a valid F95Zone URL";
|
||||||
let exists = await urlExist(url.toString());
|
let exists = await urlExist(url.toString());
|
||||||
if (!exists) return new GameInfo();
|
if (!exists) return new GameInfo();
|
||||||
|
|
||||||
let page = await preparePage(browser); // Set new isolated page
|
let page = await preparePage(browser); // Set new isolated page
|
||||||
await page.setCookie(...shared.cookies); // Set cookies to avoid login
|
await page.setCookie(...shared.cookies); // Set cookies to avoid login
|
||||||
await page.goto(url.toString(), {
|
await page.goto(url.toString(), {
|
||||||
waitUntil: shared.WAIT_STATEMENT
|
waitUntil: shared.WAIT_STATEMENT,
|
||||||
}); // Go to the game page and wait until it loads
|
}); // Go to the game page and wait until it loads
|
||||||
|
|
||||||
// It asynchronously searches for the elements and
|
// It asynchronously searches for the elements and
|
||||||
|
@ -53,15 +53,19 @@ module.exports.getGameInfo = async function(browser, url) {
|
||||||
info.overview = overview;
|
info.overview = overview;
|
||||||
info.tags = await tags;
|
info.tags = await tags;
|
||||||
info.f95url = url;
|
info.f95url = url;
|
||||||
info.version = info.isMod ? parsedInfos['MOD VERSION'] : parsedInfos['VERSION'];
|
info.version = info.isMod
|
||||||
info.lastUpdate = info.isMod ? parsedInfos['UPDATED'] : parsedInfos['THREAD UPDATED'];
|
? parsedInfos["MOD VERSION"]
|
||||||
|
: parsedInfos["VERSION"];
|
||||||
|
info.lastUpdate = info.isMod
|
||||||
|
? parsedInfos["UPDATED"]
|
||||||
|
: parsedInfos["THREAD UPDATED"];
|
||||||
info.previewSource = await previewSource;
|
info.previewSource = await previewSource;
|
||||||
info.downloadInfo = await downloadData;
|
info.downloadInfo = await downloadData;
|
||||||
|
|
||||||
await page.close(); // Close the page
|
await page.close(); // Close the page
|
||||||
if (shared.debug) console.log('Founded data for ' + info.name);
|
if (shared.debug) console.log("Founded data for " + info.name);
|
||||||
return info;
|
return info;
|
||||||
}
|
};
|
||||||
|
|
||||||
//#region Private methods
|
//#region Private methods
|
||||||
/**
|
/**
|
||||||
|
@ -75,9 +79,9 @@ module.exports.getGameInfo = async function(browser, url) {
|
||||||
function getOverview(text, isMod) {
|
function getOverview(text, isMod) {
|
||||||
// Get overview (different parsing for game and mod)
|
// Get overview (different parsing for game and mod)
|
||||||
let overviewEndIndex;
|
let overviewEndIndex;
|
||||||
if (isMod) overviewEndIndex = text.indexOf('Updated');
|
if (isMod) overviewEndIndex = text.indexOf("Updated");
|
||||||
else overviewEndIndex = text.indexOf('Thread Updated');
|
else overviewEndIndex = text.indexOf("Thread Updated");
|
||||||
return text.substring(0, overviewEndIndex).replace('Overview:\n', '').trim();
|
return text.substring(0, overviewEndIndex).replace("Overview:\n", "").trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -88,12 +92,15 @@ function getOverview(text, isMod) {
|
||||||
* @returns {Promise<String>} Structured text
|
* @returns {Promise<String>} Structured text
|
||||||
*/
|
*/
|
||||||
async function getMainPostStructuredText(page) {
|
async function getMainPostStructuredText(page) {
|
||||||
// Gets the first post, where are listed all the game's informations
|
// Gets the first post, where are listed all the game's informations
|
||||||
let post = (await page.$$(selectors.THREAD_POSTS))[0];
|
let post = (await page.$$(selectors.THREAD_POSTS))[0];
|
||||||
|
|
||||||
// The info are plain text so we need to parse the HTML code
|
// The info are plain text so we need to parse the HTML code
|
||||||
let bodyHTML = await page.evaluate( /* istanbul ignore next */ (mainPost) => mainPost.innerHTML, post);
|
let bodyHTML = await page.evaluate(
|
||||||
return HTMLParser.parse(bodyHTML).structuredText;
|
/* istanbul ignore next */ (mainPost) => mainPost.innerHTML,
|
||||||
|
post
|
||||||
|
);
|
||||||
|
return HTMLParser.parse(bodyHTML).structuredText;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -104,16 +111,18 @@ return HTMLParser.parse(bodyHTML).structuredText;
|
||||||
*/
|
*/
|
||||||
async function getGameAuthor(page) {
|
async function getGameAuthor(page) {
|
||||||
// Get the game/mod name (without square brackets)
|
// Get the game/mod name (without square brackets)
|
||||||
let titleHTML = await page.evaluate( /* istanbul ignore next */ (selector) =>
|
let titleHTML = await page.evaluate(
|
||||||
|
/* istanbul ignore next */ (selector) =>
|
||||||
document.querySelector(selector).innerHTML,
|
document.querySelector(selector).innerHTML,
|
||||||
selectors.GAME_TITLE);
|
selectors.GAME_TITLE
|
||||||
|
);
|
||||||
let structuredTitle = HTMLParser.parse(titleHTML);
|
let structuredTitle = HTMLParser.parse(titleHTML);
|
||||||
|
|
||||||
// The last element **shoud be** the title without prefixes (engines, status, other...)
|
// The last element **shoud be** the title without prefixes (engines, status, other...)
|
||||||
let gameTitle = structuredTitle.childNodes.pop().rawText;
|
let gameTitle = structuredTitle.childNodes.pop().rawText;
|
||||||
|
|
||||||
// The last square brackets contain the author
|
// The last square brackets contain the author
|
||||||
let startTitleIndex = gameTitle.lastIndexOf('[') + 1;
|
let startTitleIndex = gameTitle.lastIndexOf("[") + 1;
|
||||||
return gameTitle.substring(startTitleIndex, gameTitle.length - 1).trim();
|
return gameTitle.substring(startTitleIndex, gameTitle.length - 1).trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -128,18 +137,17 @@ function parseConversationPage(text) {
|
||||||
let dataPairs = {};
|
let dataPairs = {};
|
||||||
|
|
||||||
// The information searched in the game post are one per line
|
// The information searched in the game post are one per line
|
||||||
let splittedText = text.split('\n');
|
let splittedText = text.split("\n");
|
||||||
for (let line of splittedText) {
|
for (let line of splittedText) {
|
||||||
|
if (!line.includes(":")) continue;
|
||||||
if (!line.includes(':')) continue;
|
|
||||||
|
|
||||||
// Create pair key/value
|
// Create pair key/value
|
||||||
let splitted = line.split(':');
|
let splitted = line.split(":");
|
||||||
let key = splitted[0].trim().toUpperCase(); // Uppercase to avoid mismatch
|
let key = splitted[0].trim().toUpperCase(); // Uppercase to avoid mismatch
|
||||||
let value = splitted[1].trim();
|
let value = splitted[1].trim();
|
||||||
|
|
||||||
// Add pair to the dict if valid
|
// Add pair to the dict if valid
|
||||||
if (value != '') dataPairs[key] = value;
|
if (value != "") dataPairs[key] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
return dataPairs;
|
return dataPairs;
|
||||||
|
@ -152,13 +160,16 @@ function parseConversationPage(text) {
|
||||||
* @returns {Promise<URL>} URL of the image or null if failed to get it
|
* @returns {Promise<URL>} URL of the image or null if failed to get it
|
||||||
*/
|
*/
|
||||||
async function getGamePreviewSource(page) {
|
async function getGamePreviewSource(page) {
|
||||||
let src = await page.evaluate( /* istanbul ignore next */ (selector) => {
|
let src = await page.evaluate(
|
||||||
|
/* istanbul ignore next */ (selector) => {
|
||||||
// Get the firs image available
|
// Get the firs image available
|
||||||
let img = document.querySelector(selector);
|
let img = document.querySelector(selector);
|
||||||
|
|
||||||
if (img) return img.getAttribute('src');
|
if (img) return img.getAttribute("src");
|
||||||
else return null;
|
else return null;
|
||||||
}, selectors.GAME_IMAGES);
|
},
|
||||||
|
selectors.GAME_IMAGES
|
||||||
|
);
|
||||||
|
|
||||||
// Check if the URL is valid
|
// Check if the URL is valid
|
||||||
return isStringAValidURL(src) ? new URL(src) : null;
|
return isStringAValidURL(src) ? new URL(src) : null;
|
||||||
|
@ -172,14 +183,16 @@ async function getGamePreviewSource(page) {
|
||||||
*/
|
*/
|
||||||
async function getGameTitle(page) {
|
async function getGameTitle(page) {
|
||||||
// Get the game/mod name (without square brackets)
|
// Get the game/mod name (without square brackets)
|
||||||
let titleHTML = await page.evaluate( /* istanbul ignore next */ (selector) =>
|
let titleHTML = await page.evaluate(
|
||||||
|
/* istanbul ignore next */ (selector) =>
|
||||||
document.querySelector(selector).innerHTML,
|
document.querySelector(selector).innerHTML,
|
||||||
selectors.GAME_TITLE);
|
selectors.GAME_TITLE
|
||||||
|
);
|
||||||
let structuredTitle = HTMLParser.parse(titleHTML);
|
let structuredTitle = HTMLParser.parse(titleHTML);
|
||||||
|
|
||||||
// The last element **shoud be** the title without prefixes (engines, status, other...)
|
// The last element **shoud be** the title without prefixes (engines, status, other...)
|
||||||
let gameTitle = structuredTitle.childNodes.pop().rawText;
|
let gameTitle = structuredTitle.childNodes.pop().rawText;
|
||||||
let endTitleIndex = gameTitle.indexOf('[');
|
let endTitleIndex = gameTitle.indexOf("[");
|
||||||
return gameTitle.substring(0, endTitleIndex).trim();
|
return gameTitle.substring(0, endTitleIndex).trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -194,7 +207,10 @@ async function getGameTags(page) {
|
||||||
|
|
||||||
// Get the game tags
|
// Get the game tags
|
||||||
for (let handle of await page.$$(selectors.GAME_TAGS)) {
|
for (let handle of await page.$$(selectors.GAME_TAGS)) {
|
||||||
let tag = await page.evaluate( /* istanbul ignore next */ (element) => element.innerText, handle);
|
let tag = await page.evaluate(
|
||||||
|
/* istanbul ignore next */ (element) => element.innerText,
|
||||||
|
handle
|
||||||
|
);
|
||||||
tags.push(tag.toUpperCase());
|
tags.push(tag.toUpperCase());
|
||||||
}
|
}
|
||||||
return tags.sort();
|
return tags.sort();
|
||||||
|
@ -209,20 +225,22 @@ async function getGameTags(page) {
|
||||||
* @returns {Promise<GameInfo>} GameInfo object passed in to which the identified information has been added
|
* @returns {Promise<GameInfo>} GameInfo object passed in to which the identified information has been added
|
||||||
*/
|
*/
|
||||||
async function parsePrefixes(page, info) {
|
async function parsePrefixes(page, info) {
|
||||||
const MOD_PREFIX = 'MOD';
|
const MOD_PREFIX = "MOD";
|
||||||
|
|
||||||
// The 'Ongoing' status is not specified, only 'Abandoned'/'OnHold'/'Complete'
|
// The 'Ongoing' status is not specified, only 'Abandoned'/'OnHold'/'Complete'
|
||||||
info.status = 'Ongoing';
|
info.status = "Ongoing";
|
||||||
for (let handle of await page.$$(selectors.GAME_TITLE_PREFIXES)) {
|
for (let handle of await page.$$(selectors.GAME_TITLE_PREFIXES)) {
|
||||||
let value = await page.evaluate( /* istanbul ignore next */ (element) => element.innerText, handle);
|
let value = await page.evaluate(
|
||||||
|
/* istanbul ignore next */ (element) => element.innerText,
|
||||||
|
handle
|
||||||
|
);
|
||||||
|
|
||||||
// Clean the prefix
|
// Clean the prefix
|
||||||
let prefix = value.toUpperCase().replace('[', '').replace(']', '').trim();
|
let prefix = value.toUpperCase().replace("[", "").replace("]", "").trim();
|
||||||
|
|
||||||
// Getting infos...
|
// Getting infos...
|
||||||
if (shared.statuses.includes(prefix)) info.status = prefix;
|
if (shared.statuses.includes(prefix)) info.status = prefix;
|
||||||
else if (shared.engines.includes(prefix)) info.engine = prefix;
|
else if (shared.engines.includes(prefix)) info.engine = prefix;
|
||||||
|
|
||||||
// This is not a game but a mod
|
// This is not a game but a mod
|
||||||
else if (prefix === MOD_PREFIX) info.isMod = true;
|
else if (prefix === MOD_PREFIX) info.isMod = true;
|
||||||
}
|
}
|
||||||
|
@ -237,39 +255,57 @@ async function parsePrefixes(page, info) {
|
||||||
*/
|
*/
|
||||||
async function getGameDownloadLink(page) {
|
async function getGameDownloadLink(page) {
|
||||||
// Most used hosting platforms
|
// Most used hosting platforms
|
||||||
let hostingPlatforms = ['MEGA', 'NOPY', 'FILESUPLOAD', 'MIXDROP', 'UPLOADHAVEN', 'PIXELDRAIN', 'FILESFM'];
|
let hostingPlatforms = [
|
||||||
|
"MEGA",
|
||||||
|
"NOPY",
|
||||||
|
"FILESUPLOAD",
|
||||||
|
"MIXDROP",
|
||||||
|
"UPLOADHAVEN",
|
||||||
|
"PIXELDRAIN",
|
||||||
|
"FILESFM",
|
||||||
|
];
|
||||||
|
|
||||||
// Supported OS platforms
|
// Supported OS platforms
|
||||||
let platformOS = ['WIN', 'LINUX', 'MAC', 'ALL']
|
let platformOS = ["WIN", "LINUX", "MAC", "ALL"];
|
||||||
|
|
||||||
// Gets the <span> which contains the download links
|
// Gets the <span> which contains the download links
|
||||||
let temp = await page.$$(selectors.DOWNLOAD_LINKS_CONTAINER);
|
let temp = await page.$$(selectors.DOWNLOAD_LINKS_CONTAINER);
|
||||||
if(temp.length === 0) return [];
|
if (temp.length === 0) return [];
|
||||||
|
|
||||||
// Look for the container that contains the links
|
// Look for the container that contains the links
|
||||||
// It is necessary because the same css selector
|
// It is necessary because the same css selector
|
||||||
// also identifies other elements on the page
|
// also identifies other elements on the page
|
||||||
let container = null;
|
let container = null;
|
||||||
for(let candidate of temp) {
|
for (let candidate of temp) {
|
||||||
if (container !== null) break;
|
if (container !== null) break;
|
||||||
let upperText = (await page.evaluate( /* istanbul ignore next */ (e) => e.innerText, candidate)).toUpperCase();
|
let upperText = (
|
||||||
|
await page.evaluate(
|
||||||
|
/* istanbul ignore next */ (e) => e.innerText,
|
||||||
|
candidate
|
||||||
|
)
|
||||||
|
).toUpperCase();
|
||||||
|
|
||||||
// Search if the container contains the name of a hosting platform
|
// Search if the container contains the name of a hosting platform
|
||||||
for (let p of hostingPlatforms) {
|
for (let p of hostingPlatforms) {
|
||||||
if(upperText.includes(p)) {
|
if (upperText.includes(p)) {
|
||||||
container = candidate;
|
container = candidate;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(container === null) return [];
|
if (container === null) return [];
|
||||||
|
|
||||||
// Extract the HTML text from the container
|
// Extract the HTML text from the container
|
||||||
let searchText = (await page.evaluate( /* istanbul ignore next */ (e) => e.innerHTML, container)).toLowerCase();
|
let searchText = (
|
||||||
|
await page.evaluate(
|
||||||
|
/* istanbul ignore next */ (e) => e.innerHTML,
|
||||||
|
container
|
||||||
|
)
|
||||||
|
).toLowerCase();
|
||||||
|
|
||||||
// Parse the download links
|
// Parse the download links
|
||||||
let downloadData = [];
|
let downloadData = [];
|
||||||
for(let platform of platformOS) {
|
for (let platform of platformOS) {
|
||||||
let data = extractGameHostingData(platform, searchText);
|
let data = extractGameHostingData(platform, searchText);
|
||||||
downloadData.push(...data);
|
downloadData.push(...data);
|
||||||
}
|
}
|
||||||
|
@ -285,34 +321,33 @@ async function getGameDownloadLink(page) {
|
||||||
* @returns {GameDownload[]} List of game download links for the selected platform
|
* @returns {GameDownload[]} List of game download links for the selected platform
|
||||||
*/
|
*/
|
||||||
function extractGameHostingData(platform, text) {
|
function extractGameHostingData(platform, text) {
|
||||||
const PLATFORM_BOLD_OPEN = '<b>';
|
const PLATFORM_BOLD_OPEN = "<b>";
|
||||||
const CONTAINER_SPAN_CLOSE = '</span>';
|
const CONTAINER_SPAN_CLOSE = "</span>";
|
||||||
const LINK_OPEN = '<a';
|
const LINK_OPEN = "<a";
|
||||||
const LINK_CLOSE = '</a>';
|
const LINK_CLOSE = "</a>";
|
||||||
const HREF_START = 'href="';
|
const HREF_START = 'href="';
|
||||||
const HREF_END = '"';
|
const HREF_END = '"';
|
||||||
const TAG_CLOSE = '>';
|
const TAG_CLOSE = ">";
|
||||||
|
|
||||||
// Identify the individual platforms
|
// Identify the individual platforms
|
||||||
let startIndex = text.indexOf(platform.toLowerCase());
|
let startIndex = text.indexOf(platform.toLowerCase());
|
||||||
if(startIndex === -1) return [];
|
if (startIndex === -1) return [];
|
||||||
else startIndex += platform.length
|
else startIndex += platform.length;
|
||||||
|
|
||||||
// Find the <b>platform</b>
|
// Find the <b>platform</b>
|
||||||
let endIndex = text.indexOf(
|
let endIndex =
|
||||||
PLATFORM_BOLD_OPEN,
|
text.indexOf(PLATFORM_BOLD_OPEN, startIndex) + PLATFORM_BOLD_OPEN.length;
|
||||||
startIndex) + PLATFORM_BOLD_OPEN.length;
|
|
||||||
|
|
||||||
// Find the end of the container
|
// Find the end of the container
|
||||||
if (endIndex === -1) text.indexOf(
|
if (endIndex === -1)
|
||||||
CONTAINER_SPAN_CLOSE,
|
text.indexOf(CONTAINER_SPAN_CLOSE, startIndex) +
|
||||||
startIndex) + CONTAINER_SPAN_CLOSE.length;
|
CONTAINER_SPAN_CLOSE.length;
|
||||||
|
|
||||||
text = text.substring(startIndex, endIndex);
|
text = text.substring(startIndex, endIndex);
|
||||||
|
|
||||||
let downloadData = [];
|
let downloadData = [];
|
||||||
let linkTags = text.split(LINK_OPEN);
|
let linkTags = text.split(LINK_OPEN);
|
||||||
for(let tag of linkTags) {
|
for (let tag of linkTags) {
|
||||||
// Ignore non-link string
|
// Ignore non-link string
|
||||||
if (!tag.includes(HREF_START)) continue;
|
if (!tag.includes(HREF_START)) continue;
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,11 @@
|
||||||
const { debug, login, getGameData, loadF95BaseData, getUserData, logout } = require("../app/index");
|
const {
|
||||||
|
debug,
|
||||||
|
login,
|
||||||
|
getGameData,
|
||||||
|
loadF95BaseData,
|
||||||
|
getUserData,
|
||||||
|
logout,
|
||||||
|
} = require("../app/index");
|
||||||
|
|
||||||
//debug(true);
|
//debug(true);
|
||||||
main();
|
main();
|
||||||
|
|
Loading…
Reference in New Issue