F95API/src/scripts/scrape-data/handiwork-parse.ts

318 lines
8.7 KiB
TypeScript

// Copyright (c) 2021 MillenniumEarl
//
// This software is released under the MIT License.
// https://opensource.org/licenses/MIT
"use strict";
// Public modules from npm
import { DateTime } from "luxon";
// Modules from files
import HandiWork from "../classes/handiwork/handiwork";
import Thread from "../classes/mapping/thread";
import { IBasic, TAuthor, TChangelog, TEngine, TExternalPlatform, TStatus } from "../interfaces";
import shared, { TPrefixDict } from "../shared";
import { ILink, IPostElement } from "./post-parse";
/**
* Gets information of a particular handiwork from its thread.
*
* If you don't want to specify the object type, use `HandiWork`.
*
* @todo It does not currently support assets.
*/
export default async function getHandiworkInformation<T extends IBasic>(
arg: string | Thread
): Promise<T> {
// Local variables
let thread: Thread = null;
if (typeof arg === "string") {
// Fetch thread data
const id = extractIDFromURL(arg);
thread = new Thread(id);
await thread.fetch();
} else thread = arg;
shared.logger.info(`Obtaining handiwork from ${thread.url}`);
// Convert the info from thread to handiwork
const hw: HandiWork = {} as HandiWork;
hw.id = thread.id;
hw.url = thread.url;
hw.name = thread.title;
hw.category = thread.category;
hw.threadPublishingDate = thread.publication;
hw.lastThreadUpdate = thread.modified;
hw.tags = thread.tags;
hw.rating = thread.rating;
fillWithPrefixes(hw, thread.prefixes);
// Fetch info from first post
const post = await thread.getPost(1);
fillWithPostData(hw, post.body);
return <T>(<unknown>hw);
}
//#region Private methods
//#region Utilities
/**
* Extracts the work's unique ID from its URL.
*/
function extractIDFromURL(url: string): number {
shared.logger.trace("Extracting ID from URL...");
// URL are in the format https://f95zone.to/threads/GAMENAME-VERSION-DEVELOPER.ID/
// or https://f95zone.to/threads/ID/
const match = url.match(/([0-9]+)(?=\/|\b)(?!-|\.)/);
if (!match) return -1;
// Parse and return number
return parseInt(match[0], 10);
}
/**
* Makes an array of strings uppercase.
*/
function toUpperCaseArray(a: string[]): string[] {
/**
* Makes a string uppercase.
*/
function toUpper(s: string): string {
return s.toUpperCase();
}
return a.map(toUpper);
}
/**
* Check if the string `s` is in the dict `a`.
*
* Case insensitive.
*/
function stringInDict(s: string, a: TPrefixDict): boolean {
// Make uppercase all the strings in the array
const values = toUpperCaseArray(Object.values(a));
return values.includes(s.toUpperCase());
}
/**
* Convert a string to a boolean.
*
* Check also for `yes`/`no` and `1`/`0`.
*/
function stringToBoolean(s: string): boolean {
// Local variables
const positiveTerms = ["true", "yes", "1"];
const negativeTerms = ["false", "no", "0"];
const cleanString = s.toLowerCase().trim();
let result = Boolean(s);
if (positiveTerms.includes(cleanString)) result = true;
else if (negativeTerms.includes(cleanString)) result = false;
return result;
}
/**
* Gets the element with the given name or `undefined`.
*
* Case-insensitive.
*/
function getPostElementByName(elements: IPostElement[], name: string): IPostElement | undefined {
return elements.find((el) => el.name.toUpperCase() === name.toUpperCase());
}
//#endregion Utilities
/**
* Parse the post prefixes.
*
* In particular, it elaborates the following prefixes for games:
* `Engine`, `Status`, `Mod`.
*/
function fillWithPrefixes(hw: HandiWork, prefixes: string[]) {
shared.logger.trace("Parsing prefixes...");
// Local variables
let mod = false;
let engine: TEngine = null;
let status: TStatus = null;
/**
* Emulated dictionary of mod prefixes.
*/
const fakeModDict: TPrefixDict = {
0: "MOD",
1: "CHEAT MOD"
};
// Initialize the array
hw.prefixes = [];
prefixes.map((item, idx) => {
// Remove the square brackets
const prefix = item.replace("[", "").replace("]", "");
// Check what the prefix indicates
if (stringInDict(prefix, shared.prefixes["engines"])) engine = prefix as TEngine;
else if (stringInDict(prefix, shared.prefixes["statuses"])) status = prefix as TStatus;
else if (stringInDict(prefix, fakeModDict)) mod = true;
// Anyway add the prefix to list
hw.prefixes.push(prefix);
});
// If the status is not set, then the game is in development (Ongoing)
status = !status && hw.category === "games" ? status : "Ongoing";
hw.engine = engine;
hw.status = status;
hw.mod = mod;
}
/**
* Compiles a HandiWork object with the data extracted
* from the main post of the HandiWork page.
*
* The values that will be added are:
* `Overview`, `OS`, `Language`, `Version`, `Installation`,
* `Pages`, `Resolution`, `Lenght`, `Genre`, `Censored`,
* `LastRelease`, `Authors`, `Changelog`, `Cover`.
*/
function fillWithPostData(hw: HandiWork, elements: IPostElement[]) {
// First fill the "simple" elements
hw.overview = getPostElementByName(elements, "overview")?.text;
hw.os = getPostElementByName(elements, "os")
?.text?.split(",")
.map((s) => s.trim());
hw.language = getPostElementByName(elements, "language")
?.text?.split(",")
.map((s) => s.trim());
hw.version = getPostElementByName(elements, "version")?.text;
hw.installation = getPostElementByName(elements, "installation")?.text;
hw.pages = getPostElementByName(elements, "pages")?.text;
hw.resolution = getPostElementByName(elements, "resolution")
?.text?.split(",")
.map((s) => s.trim());
hw.lenght = getPostElementByName(elements, "lenght")?.text;
// Parse the censorship
const censored =
getPostElementByName(elements, "censored") || getPostElementByName(elements, "censorship");
if (censored) hw.censored = stringToBoolean(censored.text);
// Get the genres
const genre = getPostElementByName(elements, "genre")?.text;
hw.genre = genre
?.split(",")
.map((s) => s.trim())
.filter((s) => s !== "");
// Get the cover
const cover = elements.find((e) => e.type === "Image") as ILink;
hw.cover = cover?.href;
// Fill the dates
const releaseDate = getPostElementByName(elements, "release date")?.text;
if (DateTime.fromISO(releaseDate).isValid) hw.lastRelease = new Date(releaseDate);
// Get the author
hw.authors = parseAuthor(elements);
// Get the changelog
hw.changelog = parseChangelog(elements);
}
/**
* Parse the author from the post's data.
*/
function parseAuthor(elements: IPostElement[]): TAuthor[] {
// Local variables
const author: TAuthor = {
name: "",
platforms: []
};
// Fetch the authors from the post data
const authorElement =
getPostElementByName(elements, "developer") ||
getPostElementByName(elements, "developer/publisher") ||
getPostElementByName(elements, "artist");
if (authorElement) {
// Set the author name
author.name = authorElement.text;
// Add the found platforms
authorElement.content.forEach((e: ILink) => {
// Ignore invalid links
if (e.href) {
// Create and push the new platform
const platform: TExternalPlatform = {
name: e.text,
link: e.href
};
author.platforms.push(platform);
}
});
}
return [author];
}
/**
* Parse the changelog from the post's data.
*/
function parseChangelog(elements: IPostElement[]): TChangelog[] {
// Local variables
const changelog = [];
const changelogElement =
getPostElementByName(elements, "changelog") || getPostElementByName(elements, "change-log");
if (changelogElement) {
// regex used to match version tags
const versionRegex = /^v[0-9]+\.[0-9]+.*/;
// Get the indexes of the version tags
const indexesVersion = changelogElement.content
.filter((e) => e.type === "Text" && versionRegex.test(e.text))
.map((e) => changelogElement.content.indexOf(e));
const results = indexesVersion.map((i, j) => {
// In-loop variable
const versionChangelog: TChangelog = {
version: "",
information: []
};
// Get the difference in indexes between this and the next version tag
const diff = indexesVersion[j + 1] ?? changelogElement.content.length;
// fetch the group of data of this version tag
const group = changelogElement.content.slice(i, diff);
versionChangelog.version = group.shift().text.replace("v", "").trim();
// parse the data
group.forEach((e) => {
if (e.type === "Generic" || e.type === "Spoiler") {
const textes = e.content.map((c) => c.text);
versionChangelog.information.push(...textes);
} else versionChangelog.information.push(e.text);
});
return versionChangelog;
});
changelog.push(...results);
}
return changelog;
}
//#endregion Private methods