Merge branch 'master' of https://github.com/MillenniumEarl/F95API
						commit
						8a2fd49308
					
				| 
						 | 
				
			
			@ -1,4 +1,7 @@
 | 
			
		|||
# F95API
 | 
			
		||||
 | 
			
		||||
Unofficial Node JS module for scraping F95Zone platform
 | 
			
		||||
 | 
			
		||||
[](https://app.fossa.com/projects/git%2Bgithub.com%2FMillenniumEarl%2FF95API?ref=badge_shield)
 | 
			
		||||
[](https://deepsource.io/gh/MillenniumEarl/F95API/?ref=repository-badge)
 | 
			
		||||
[](undefined)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,17 +1,17 @@
 | 
			
		|||
'use strict';
 | 
			
		||||
"use strict";
 | 
			
		||||
 | 
			
		||||
// Public modules from npm
 | 
			
		||||
const HTMLParser = require('node-html-parser');
 | 
			
		||||
const puppeteer = require('puppeteer');
 | 
			
		||||
const urlExist = require('url-exist');
 | 
			
		||||
const HTMLParser = require("node-html-parser");
 | 
			
		||||
const puppeteer = require("puppeteer");
 | 
			
		||||
const urlExist = require("url-exist");
 | 
			
		||||
 | 
			
		||||
// Modules from file
 | 
			
		||||
const shared = require('./shared.js');
 | 
			
		||||
const selectors = require('./costants/css-selectors.js');
 | 
			
		||||
const { preparePage } = require('./puppeteer-helper.js');
 | 
			
		||||
const GameDownload = require('./classes/game-download.js');
 | 
			
		||||
const GameInfo = require('./classes/game-info.js');
 | 
			
		||||
const { isStringAValidURL, isF95URL } = require('./urls-helper.js');
 | 
			
		||||
const shared = require("./shared.js");
 | 
			
		||||
const selectors = require("./costants/css-selectors.js");
 | 
			
		||||
const { preparePage } = require("./puppeteer-helper.js");
 | 
			
		||||
const GameDownload = require("./classes/game-download.js");
 | 
			
		||||
const GameInfo = require("./classes/game-info.js");
 | 
			
		||||
const { isStringAValidURL, isF95URL } = require("./urls-helper.js");
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * @protected
 | 
			
		||||
| 
						 | 
				
			
			@ -21,17 +21,17 @@ const { isStringAValidURL, isF95URL } = require('./urls-helper.js');
 | 
			
		|||
 * @return {Promise<GameInfo>} Complete information about the game you are looking for
 | 
			
		||||
 */
 | 
			
		||||
module.exports.getGameInfo = async function (browser, url) {
 | 
			
		||||
    if (shared.debug) console.log('Obtaining game info');
 | 
			
		||||
  if (shared.debug) console.log("Obtaining game info");
 | 
			
		||||
 | 
			
		||||
  // Verify the correctness of the URL
 | 
			
		||||
    if (!isF95URL(url)) throw url + ' is not a valid F95Zone URL';
 | 
			
		||||
  if (!isF95URL(url)) throw url + " is not a valid F95Zone URL";
 | 
			
		||||
  let exists = await urlExist(url.toString());
 | 
			
		||||
  if (!exists) return new GameInfo();
 | 
			
		||||
 | 
			
		||||
  let page = await preparePage(browser); // Set new isolated page
 | 
			
		||||
  await page.setCookie(...shared.cookies); // Set cookies to avoid login
 | 
			
		||||
  await page.goto(url.toString(), {
 | 
			
		||||
        waitUntil: shared.WAIT_STATEMENT
 | 
			
		||||
    waitUntil: shared.WAIT_STATEMENT,
 | 
			
		||||
  }); // Go to the game page and wait until it loads
 | 
			
		||||
 | 
			
		||||
  // It asynchronously searches for the elements and
 | 
			
		||||
| 
						 | 
				
			
			@ -53,15 +53,19 @@ module.exports.getGameInfo = async function(browser, url) {
 | 
			
		|||
  info.overview = overview;
 | 
			
		||||
  info.tags = await tags;
 | 
			
		||||
  info.f95url = url;
 | 
			
		||||
    info.version = info.isMod ? parsedInfos['MOD VERSION'] : parsedInfos['VERSION'];
 | 
			
		||||
    info.lastUpdate = info.isMod ? parsedInfos['UPDATED'] : parsedInfos['THREAD UPDATED'];
 | 
			
		||||
  info.version = info.isMod
 | 
			
		||||
    ? parsedInfos["MOD VERSION"]
 | 
			
		||||
    : parsedInfos["VERSION"];
 | 
			
		||||
  info.lastUpdate = info.isMod
 | 
			
		||||
    ? parsedInfos["UPDATED"]
 | 
			
		||||
    : parsedInfos["THREAD UPDATED"];
 | 
			
		||||
  info.previewSource = await previewSource;
 | 
			
		||||
  info.downloadInfo = await downloadData;
 | 
			
		||||
 | 
			
		||||
  await page.close(); // Close the page
 | 
			
		||||
    if (shared.debug) console.log('Founded data for ' + info.name);
 | 
			
		||||
  if (shared.debug) console.log("Founded data for " + info.name);
 | 
			
		||||
  return info;
 | 
			
		||||
}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
//#region Private methods
 | 
			
		||||
/**
 | 
			
		||||
| 
						 | 
				
			
			@ -75,9 +79,9 @@ module.exports.getGameInfo = async function(browser, url) {
 | 
			
		|||
function getOverview(text, isMod) {
 | 
			
		||||
  // Get overview (different parsing for game and mod)
 | 
			
		||||
  let overviewEndIndex;
 | 
			
		||||
    if (isMod) overviewEndIndex = text.indexOf('Updated');
 | 
			
		||||
    else overviewEndIndex = text.indexOf('Thread Updated');
 | 
			
		||||
    return text.substring(0, overviewEndIndex).replace('Overview:\n', '').trim();
 | 
			
		||||
  if (isMod) overviewEndIndex = text.indexOf("Updated");
 | 
			
		||||
  else overviewEndIndex = text.indexOf("Thread Updated");
 | 
			
		||||
  return text.substring(0, overviewEndIndex).replace("Overview:\n", "").trim();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
| 
						 | 
				
			
			@ -92,7 +96,10 @@ async function getMainPostStructuredText(page) {
 | 
			
		|||
  let post = (await page.$$(selectors.THREAD_POSTS))[0];
 | 
			
		||||
 | 
			
		||||
  // The info are plain text so we need to parse the HTML code
 | 
			
		||||
let bodyHTML = await page.evaluate( /* istanbul ignore next */ (mainPost) => mainPost.innerHTML, post);
 | 
			
		||||
  let bodyHTML = await page.evaluate(
 | 
			
		||||
    /* istanbul ignore next */ (mainPost) => mainPost.innerHTML,
 | 
			
		||||
    post
 | 
			
		||||
  );
 | 
			
		||||
  return HTMLParser.parse(bodyHTML).structuredText;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -104,16 +111,18 @@ return HTMLParser.parse(bodyHTML).structuredText;
 | 
			
		|||
 */
 | 
			
		||||
async function getGameAuthor(page) {
 | 
			
		||||
  // Get the game/mod name (without square brackets)
 | 
			
		||||
    let titleHTML = await page.evaluate( /* istanbul ignore next */ (selector) =>
 | 
			
		||||
  let titleHTML = await page.evaluate(
 | 
			
		||||
    /* istanbul ignore next */ (selector) =>
 | 
			
		||||
      document.querySelector(selector).innerHTML,
 | 
			
		||||
    selectors.GAME_TITLE);
 | 
			
		||||
    selectors.GAME_TITLE
 | 
			
		||||
  );
 | 
			
		||||
  let structuredTitle = HTMLParser.parse(titleHTML);
 | 
			
		||||
 | 
			
		||||
  // The last element **shoud be** the title without prefixes (engines, status, other...)
 | 
			
		||||
  let gameTitle = structuredTitle.childNodes.pop().rawText;
 | 
			
		||||
 | 
			
		||||
  // The last square brackets contain the author
 | 
			
		||||
    let startTitleIndex = gameTitle.lastIndexOf('[') + 1;
 | 
			
		||||
  let startTitleIndex = gameTitle.lastIndexOf("[") + 1;
 | 
			
		||||
  return gameTitle.substring(startTitleIndex, gameTitle.length - 1).trim();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -128,18 +137,17 @@ function parseConversationPage(text) {
 | 
			
		|||
  let dataPairs = {};
 | 
			
		||||
 | 
			
		||||
  // The information searched in the game post are one per line
 | 
			
		||||
    let splittedText = text.split('\n');
 | 
			
		||||
  let splittedText = text.split("\n");
 | 
			
		||||
  for (let line of splittedText) {
 | 
			
		||||
 | 
			
		||||
        if (!line.includes(':')) continue;
 | 
			
		||||
    if (!line.includes(":")) continue;
 | 
			
		||||
 | 
			
		||||
    // Create pair key/value
 | 
			
		||||
        let splitted = line.split(':');
 | 
			
		||||
    let splitted = line.split(":");
 | 
			
		||||
    let key = splitted[0].trim().toUpperCase(); // Uppercase to avoid mismatch
 | 
			
		||||
    let value = splitted[1].trim();
 | 
			
		||||
 | 
			
		||||
    // Add pair to the dict if valid
 | 
			
		||||
        if (value != '') dataPairs[key] = value;
 | 
			
		||||
    if (value != "") dataPairs[key] = value;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return dataPairs;
 | 
			
		||||
| 
						 | 
				
			
			@ -152,13 +160,16 @@ function parseConversationPage(text) {
 | 
			
		|||
 * @returns {Promise<URL>} URL of the image or null if failed to get it
 | 
			
		||||
 */
 | 
			
		||||
async function getGamePreviewSource(page) {
 | 
			
		||||
    let src = await page.evaluate( /* istanbul ignore next */ (selector) => {
 | 
			
		||||
  let src = await page.evaluate(
 | 
			
		||||
    /* istanbul ignore next */ (selector) => {
 | 
			
		||||
      // Get the firs image available
 | 
			
		||||
      let img = document.querySelector(selector);
 | 
			
		||||
 | 
			
		||||
        if (img) return img.getAttribute('src');
 | 
			
		||||
      if (img) return img.getAttribute("src");
 | 
			
		||||
      else return null;
 | 
			
		||||
    }, selectors.GAME_IMAGES);
 | 
			
		||||
    },
 | 
			
		||||
    selectors.GAME_IMAGES
 | 
			
		||||
  );
 | 
			
		||||
 | 
			
		||||
  // Check if the URL is valid
 | 
			
		||||
  return isStringAValidURL(src) ? new URL(src) : null;
 | 
			
		||||
| 
						 | 
				
			
			@ -172,14 +183,16 @@ async function getGamePreviewSource(page) {
 | 
			
		|||
 */
 | 
			
		||||
async function getGameTitle(page) {
 | 
			
		||||
  // Get the game/mod name (without square brackets)
 | 
			
		||||
    let titleHTML = await page.evaluate( /* istanbul ignore next */ (selector) =>
 | 
			
		||||
  let titleHTML = await page.evaluate(
 | 
			
		||||
    /* istanbul ignore next */ (selector) =>
 | 
			
		||||
      document.querySelector(selector).innerHTML,
 | 
			
		||||
    selectors.GAME_TITLE);
 | 
			
		||||
    selectors.GAME_TITLE
 | 
			
		||||
  );
 | 
			
		||||
  let structuredTitle = HTMLParser.parse(titleHTML);
 | 
			
		||||
 | 
			
		||||
  // The last element **shoud be** the title without prefixes (engines, status, other...)
 | 
			
		||||
  let gameTitle = structuredTitle.childNodes.pop().rawText;
 | 
			
		||||
    let endTitleIndex = gameTitle.indexOf('[');
 | 
			
		||||
  let endTitleIndex = gameTitle.indexOf("[");
 | 
			
		||||
  return gameTitle.substring(0, endTitleIndex).trim();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -194,7 +207,10 @@ async function getGameTags(page) {
 | 
			
		|||
 | 
			
		||||
  // Get the game tags
 | 
			
		||||
  for (let handle of await page.$$(selectors.GAME_TAGS)) {
 | 
			
		||||
        let tag = await page.evaluate( /* istanbul ignore next */ (element) => element.innerText, handle);
 | 
			
		||||
    let tag = await page.evaluate(
 | 
			
		||||
      /* istanbul ignore next */ (element) => element.innerText,
 | 
			
		||||
      handle
 | 
			
		||||
    );
 | 
			
		||||
    tags.push(tag.toUpperCase());
 | 
			
		||||
  }
 | 
			
		||||
  return tags.sort();
 | 
			
		||||
| 
						 | 
				
			
			@ -209,20 +225,22 @@ async function getGameTags(page) {
 | 
			
		|||
 * @returns {Promise<GameInfo>} GameInfo object passed in to which the identified information has been added
 | 
			
		||||
 */
 | 
			
		||||
async function parsePrefixes(page, info) {
 | 
			
		||||
    const MOD_PREFIX = 'MOD';
 | 
			
		||||
  const MOD_PREFIX = "MOD";
 | 
			
		||||
 | 
			
		||||
  // The 'Ongoing' status is not specified, only 'Abandoned'/'OnHold'/'Complete'
 | 
			
		||||
    info.status = 'Ongoing';
 | 
			
		||||
  info.status = "Ongoing";
 | 
			
		||||
  for (let handle of await page.$$(selectors.GAME_TITLE_PREFIXES)) {
 | 
			
		||||
        let value = await page.evaluate( /* istanbul ignore next */ (element) => element.innerText, handle);
 | 
			
		||||
    let value = await page.evaluate(
 | 
			
		||||
      /* istanbul ignore next */ (element) => element.innerText,
 | 
			
		||||
      handle
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    // Clean the prefix
 | 
			
		||||
        let prefix = value.toUpperCase().replace('[', '').replace(']', '').trim();
 | 
			
		||||
    let prefix = value.toUpperCase().replace("[", "").replace("]", "").trim();
 | 
			
		||||
 | 
			
		||||
    // Getting infos...
 | 
			
		||||
    if (shared.statuses.includes(prefix)) info.status = prefix;
 | 
			
		||||
    else if (shared.engines.includes(prefix)) info.engine = prefix;
 | 
			
		||||
 | 
			
		||||
    // This is not a game but a mod
 | 
			
		||||
    else if (prefix === MOD_PREFIX) info.isMod = true;
 | 
			
		||||
  }
 | 
			
		||||
| 
						 | 
				
			
			@ -237,10 +255,18 @@ async function parsePrefixes(page, info) {
 | 
			
		|||
 */
 | 
			
		||||
async function getGameDownloadLink(page) {
 | 
			
		||||
  // Most used hosting platforms
 | 
			
		||||
    let hostingPlatforms = ['MEGA', 'NOPY', 'FILESUPLOAD', 'MIXDROP', 'UPLOADHAVEN', 'PIXELDRAIN', 'FILESFM'];
 | 
			
		||||
  let hostingPlatforms = [
 | 
			
		||||
    "MEGA",
 | 
			
		||||
    "NOPY",
 | 
			
		||||
    "FILESUPLOAD",
 | 
			
		||||
    "MIXDROP",
 | 
			
		||||
    "UPLOADHAVEN",
 | 
			
		||||
    "PIXELDRAIN",
 | 
			
		||||
    "FILESFM",
 | 
			
		||||
  ];
 | 
			
		||||
 | 
			
		||||
  // Supported OS platforms
 | 
			
		||||
    let platformOS = ['WIN', 'LINUX', 'MAC', 'ALL']
 | 
			
		||||
  let platformOS = ["WIN", "LINUX", "MAC", "ALL"];
 | 
			
		||||
 | 
			
		||||
  // Gets the <span> which contains the download links
 | 
			
		||||
  let temp = await page.$$(selectors.DOWNLOAD_LINKS_CONTAINER);
 | 
			
		||||
| 
						 | 
				
			
			@ -252,7 +278,12 @@ async function getGameDownloadLink(page) {
 | 
			
		|||
  let container = null;
 | 
			
		||||
  for (let candidate of temp) {
 | 
			
		||||
    if (container !== null) break;
 | 
			
		||||
        let upperText = (await page.evaluate( /* istanbul ignore next */ (e) => e.innerText, candidate)).toUpperCase();
 | 
			
		||||
    let upperText = (
 | 
			
		||||
      await page.evaluate(
 | 
			
		||||
        /* istanbul ignore next */ (e) => e.innerText,
 | 
			
		||||
        candidate
 | 
			
		||||
      )
 | 
			
		||||
    ).toUpperCase();
 | 
			
		||||
 | 
			
		||||
    // Search if the container contains the name of a hosting platform
 | 
			
		||||
    for (let p of hostingPlatforms) {
 | 
			
		||||
| 
						 | 
				
			
			@ -265,7 +296,12 @@ async function getGameDownloadLink(page) {
 | 
			
		|||
  if (container === null) return [];
 | 
			
		||||
 | 
			
		||||
  // Extract the HTML text from the container
 | 
			
		||||
    let searchText = (await page.evaluate( /* istanbul ignore next */ (e) => e.innerHTML, container)).toLowerCase();
 | 
			
		||||
  let searchText = (
 | 
			
		||||
    await page.evaluate(
 | 
			
		||||
      /* istanbul ignore next */ (e) => e.innerHTML,
 | 
			
		||||
      container
 | 
			
		||||
    )
 | 
			
		||||
  ).toLowerCase();
 | 
			
		||||
 | 
			
		||||
  // Parse the download links
 | 
			
		||||
  let downloadData = [];
 | 
			
		||||
| 
						 | 
				
			
			@ -285,28 +321,27 @@ async function getGameDownloadLink(page) {
 | 
			
		|||
 * @returns {GameDownload[]} List of game download links for the selected platform
 | 
			
		||||
 */
 | 
			
		||||
function extractGameHostingData(platform, text) {
 | 
			
		||||
    const PLATFORM_BOLD_OPEN = '<b>';
 | 
			
		||||
    const CONTAINER_SPAN_CLOSE = '</span>';
 | 
			
		||||
    const LINK_OPEN = '<a';
 | 
			
		||||
    const LINK_CLOSE = '</a>';
 | 
			
		||||
  const PLATFORM_BOLD_OPEN = "<b>";
 | 
			
		||||
  const CONTAINER_SPAN_CLOSE = "</span>";
 | 
			
		||||
  const LINK_OPEN = "<a";
 | 
			
		||||
  const LINK_CLOSE = "</a>";
 | 
			
		||||
  const HREF_START = 'href="';
 | 
			
		||||
  const HREF_END = '"';
 | 
			
		||||
    const TAG_CLOSE = '>';
 | 
			
		||||
  const TAG_CLOSE = ">";
 | 
			
		||||
 | 
			
		||||
  // Identify the individual platforms
 | 
			
		||||
  let startIndex = text.indexOf(platform.toLowerCase());
 | 
			
		||||
  if (startIndex === -1) return [];
 | 
			
		||||
    else startIndex += platform.length
 | 
			
		||||
  else startIndex += platform.length;
 | 
			
		||||
 | 
			
		||||
  // Find the <b>platform</b>
 | 
			
		||||
    let endIndex = text.indexOf(
 | 
			
		||||
        PLATFORM_BOLD_OPEN,
 | 
			
		||||
        startIndex) + PLATFORM_BOLD_OPEN.length;
 | 
			
		||||
  let endIndex =
 | 
			
		||||
    text.indexOf(PLATFORM_BOLD_OPEN, startIndex) + PLATFORM_BOLD_OPEN.length;
 | 
			
		||||
 | 
			
		||||
  // Find the end of the container
 | 
			
		||||
    if (endIndex === -1) text.indexOf(
 | 
			
		||||
        CONTAINER_SPAN_CLOSE,
 | 
			
		||||
        startIndex) + CONTAINER_SPAN_CLOSE.length;
 | 
			
		||||
  if (endIndex === -1)
 | 
			
		||||
    text.indexOf(CONTAINER_SPAN_CLOSE, startIndex) +
 | 
			
		||||
      CONTAINER_SPAN_CLOSE.length;
 | 
			
		||||
 | 
			
		||||
  text = text.substring(startIndex, endIndex);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,4 +1,11 @@
 | 
			
		|||
const { debug, login, getGameData, loadF95BaseData, getUserData, logout } = require("../app/index");
 | 
			
		||||
const {
 | 
			
		||||
  debug,
 | 
			
		||||
  login,
 | 
			
		||||
  getGameData,
 | 
			
		||||
  loadF95BaseData,
 | 
			
		||||
  getUserData,
 | 
			
		||||
  logout,
 | 
			
		||||
} = require("../app/index");
 | 
			
		||||
 | 
			
		||||
//debug(true);
 | 
			
		||||
main();
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue