commit
b0ce179e73
|
@ -6,7 +6,7 @@
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
// Modules from files
|
// Modules from files
|
||||||
import { TAuthor, IAnimation, TRating, TCategory } from "../../interfaces";
|
import { TAuthor, IAnimation, TRating, TCategory, TChangelog } from "../../interfaces";
|
||||||
|
|
||||||
export default class Animation implements IAnimation {
|
export default class Animation implements IAnimation {
|
||||||
//#region Properties
|
//#region Properties
|
||||||
|
@ -19,7 +19,7 @@ export default class Animation implements IAnimation {
|
||||||
resolution: string[];
|
resolution: string[];
|
||||||
authors: TAuthor[];
|
authors: TAuthor[];
|
||||||
category: TCategory;
|
category: TCategory;
|
||||||
changelog: string[];
|
changelog: TChangelog[];
|
||||||
cover: string;
|
cover: string;
|
||||||
id: number;
|
id: number;
|
||||||
lastThreadUpdate: Date;
|
lastThreadUpdate: Date;
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
// Modules from files
|
// Modules from files
|
||||||
import { TAuthor, IAsset, TRating, TCategory } from "../../interfaces";
|
import { TAuthor, IAsset, TRating, TCategory, TChangelog } from "../../interfaces";
|
||||||
|
|
||||||
export default class Asset implements IAsset {
|
export default class Asset implements IAsset {
|
||||||
//#region Properties
|
//#region Properties
|
||||||
|
@ -18,7 +18,7 @@ export default class Asset implements IAsset {
|
||||||
sku: string;
|
sku: string;
|
||||||
authors: TAuthor[];
|
authors: TAuthor[];
|
||||||
category: TCategory;
|
category: TCategory;
|
||||||
changelog: string[];
|
changelog: TChangelog[];
|
||||||
cover: string;
|
cover: string;
|
||||||
id: number;
|
id: number;
|
||||||
lastThreadUpdate: Date;
|
lastThreadUpdate: Date;
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
// Modules from files
|
// Modules from files
|
||||||
import { TAuthor, IComic, TRating, TCategory } from "../../interfaces";
|
import { TAuthor, IComic, TRating, TCategory, TChangelog } from "../../interfaces";
|
||||||
|
|
||||||
export default class Comic implements IComic {
|
export default class Comic implements IComic {
|
||||||
//#region Properties
|
//#region Properties
|
||||||
|
@ -15,7 +15,7 @@ export default class Comic implements IComic {
|
||||||
resolution: string[];
|
resolution: string[];
|
||||||
authors: TAuthor[];
|
authors: TAuthor[];
|
||||||
category: TCategory;
|
category: TCategory;
|
||||||
changelog: string[];
|
changelog: TChangelog[];
|
||||||
cover: string;
|
cover: string;
|
||||||
id: number;
|
id: number;
|
||||||
lastThreadUpdate: Date;
|
lastThreadUpdate: Date;
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
// Modules from files
|
// Modules from files
|
||||||
import { TAuthor, TEngine, IGame, TRating, TStatus, TCategory } from "../../interfaces";
|
import { TAuthor, TEngine, IGame, TRating, TStatus, TCategory, TChangelog } from "../../interfaces";
|
||||||
|
|
||||||
export default class Game implements IGame {
|
export default class Game implements IGame {
|
||||||
//#region Properties
|
//#region Properties
|
||||||
|
@ -22,7 +22,7 @@ export default class Game implements IGame {
|
||||||
version: string;
|
version: string;
|
||||||
authors: TAuthor[];
|
authors: TAuthor[];
|
||||||
category: TCategory;
|
category: TCategory;
|
||||||
changelog: string[];
|
changelog: TChangelog[];
|
||||||
cover: string;
|
cover: string;
|
||||||
id: number;
|
id: number;
|
||||||
lastThreadUpdate: Date;
|
lastThreadUpdate: Date;
|
||||||
|
|
|
@ -6,7 +6,15 @@
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
// Modules from files
|
// Modules from files
|
||||||
import { TAuthor, TRating, IHandiwork, TEngine, TCategory, TStatus } from "../../interfaces";
|
import {
|
||||||
|
TAuthor,
|
||||||
|
TRating,
|
||||||
|
IHandiwork,
|
||||||
|
TEngine,
|
||||||
|
TCategory,
|
||||||
|
TStatus,
|
||||||
|
TChangelog
|
||||||
|
} from "../../interfaces";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* It represents a generic work, be it a game, a comic, an animation or an asset.
|
* It represents a generic work, be it a game, a comic, an animation or an asset.
|
||||||
|
@ -25,7 +33,7 @@ export default class HandiWork implements IHandiwork {
|
||||||
version: string;
|
version: string;
|
||||||
authors: TAuthor[];
|
authors: TAuthor[];
|
||||||
category: TCategory;
|
category: TCategory;
|
||||||
changelog: string[];
|
changelog: TChangelog[];
|
||||||
cover: string;
|
cover: string;
|
||||||
id: number;
|
id: number;
|
||||||
lastThreadUpdate: Date;
|
lastThreadUpdate: Date;
|
||||||
|
|
|
@ -51,6 +51,20 @@ export type TRating = {
|
||||||
count: number;
|
count: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Information about a single version of the product.
|
||||||
|
*/
|
||||||
|
export type TChangelog = {
|
||||||
|
/**
|
||||||
|
* Product version.
|
||||||
|
*/
|
||||||
|
version: string;
|
||||||
|
/**
|
||||||
|
* Version information.
|
||||||
|
*/
|
||||||
|
information: string[];
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* List of possible graphics engines used for game development.
|
* List of possible graphics engines used for game development.
|
||||||
*/
|
*/
|
||||||
|
@ -101,7 +115,7 @@ export interface IBasic {
|
||||||
/**
|
/**
|
||||||
* List of changes of the work for each version.
|
* List of changes of the work for each version.
|
||||||
*/
|
*/
|
||||||
changelog: string[];
|
changelog: TChangelog[];
|
||||||
/**
|
/**
|
||||||
* link to the cover image of the work.
|
* link to the cover image of the work.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -11,7 +11,7 @@ import { DateTime } from "luxon";
|
||||||
// Modules from files
|
// Modules from files
|
||||||
import HandiWork from "../classes/handiwork/handiwork";
|
import HandiWork from "../classes/handiwork/handiwork";
|
||||||
import Thread from "../classes/mapping/thread";
|
import Thread from "../classes/mapping/thread";
|
||||||
import { IBasic, TAuthor, TEngine, TExternalPlatform, TStatus } from "../interfaces";
|
import { IBasic, TAuthor, TChangelog, TEngine, TExternalPlatform, TStatus } from "../interfaces";
|
||||||
import shared, { TPrefixDict } from "../shared";
|
import shared, { TPrefixDict } from "../shared";
|
||||||
import { ILink, IPostElement } from "./post-parse";
|
import { ILink, IPostElement } from "./post-parse";
|
||||||
|
|
||||||
|
@ -193,7 +193,7 @@ function fillWithPostData(hw: HandiWork, elements: IPostElement[]) {
|
||||||
?.text?.split(",")
|
?.text?.split(",")
|
||||||
.map((s) => s.trim());
|
.map((s) => s.trim());
|
||||||
hw.version = getPostElementByName(elements, "version")?.text;
|
hw.version = getPostElementByName(elements, "version")?.text;
|
||||||
hw.installation = getPostElementByName(elements, "installation")?.content.shift()?.text;
|
hw.installation = getPostElementByName(elements, "installation")?.text;
|
||||||
hw.pages = getPostElementByName(elements, "pages")?.text;
|
hw.pages = getPostElementByName(elements, "pages")?.text;
|
||||||
hw.resolution = getPostElementByName(elements, "resolution")
|
hw.resolution = getPostElementByName(elements, "resolution")
|
||||||
?.text?.split(",")
|
?.text?.split(",")
|
||||||
|
@ -206,64 +206,112 @@ function fillWithPostData(hw: HandiWork, elements: IPostElement[]) {
|
||||||
if (censored) hw.censored = stringToBoolean(censored.text);
|
if (censored) hw.censored = stringToBoolean(censored.text);
|
||||||
|
|
||||||
// Get the genres
|
// Get the genres
|
||||||
const genre = getPostElementByName(elements, "genre")?.content.shift()?.text;
|
const genre = getPostElementByName(elements, "genre")?.text;
|
||||||
hw.genre = genre
|
hw.genre = genre
|
||||||
?.split(",")
|
?.split(",")
|
||||||
.map((s) => s.trim())
|
.map((s) => s.trim())
|
||||||
.filter((s) => s !== "");
|
.filter((s) => s !== "");
|
||||||
|
|
||||||
// Get the cover
|
// Get the cover
|
||||||
const cover = getPostElementByName(elements, "overview")?.content.find(
|
const cover = elements.find((e) => e.type === "Image") as ILink;
|
||||||
(el) => el.type === "Image"
|
|
||||||
) as ILink;
|
|
||||||
hw.cover = cover?.href;
|
hw.cover = cover?.href;
|
||||||
|
|
||||||
// Fill the dates
|
// Fill the dates
|
||||||
const releaseDate = getPostElementByName(elements, "release date")?.text;
|
const releaseDate = getPostElementByName(elements, "release date")?.text;
|
||||||
if (DateTime.fromISO(releaseDate).isValid) hw.lastRelease = new Date(releaseDate);
|
if (DateTime.fromISO(releaseDate).isValid) hw.lastRelease = new Date(releaseDate);
|
||||||
|
|
||||||
//#region Convert the author
|
// Get the author
|
||||||
|
hw.authors = parseAuthor(elements);
|
||||||
|
|
||||||
|
// Get the changelog
|
||||||
|
hw.changelog = parseChangelog(elements);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse the author from the post's data.
|
||||||
|
*/
|
||||||
|
function parseAuthor(elements: IPostElement[]): TAuthor[] {
|
||||||
|
// Local variables
|
||||||
|
const author: TAuthor = {
|
||||||
|
name: "",
|
||||||
|
platforms: []
|
||||||
|
};
|
||||||
|
|
||||||
|
// Fetch the authors from the post data
|
||||||
const authorElement =
|
const authorElement =
|
||||||
getPostElementByName(elements, "developer") ||
|
getPostElementByName(elements, "developer") ||
|
||||||
getPostElementByName(elements, "developer/publisher") ||
|
getPostElementByName(elements, "developer/publisher") ||
|
||||||
getPostElementByName(elements, "artist");
|
getPostElementByName(elements, "artist");
|
||||||
const author: TAuthor = {
|
|
||||||
name: authorElement?.text,
|
if (authorElement) {
|
||||||
platforms: []
|
// Set the author name
|
||||||
};
|
author.name = authorElement.text;
|
||||||
|
|
||||||
// Add the found platforms
|
// Add the found platforms
|
||||||
authorElement?.content.forEach((el: ILink, idx) => {
|
authorElement.content.forEach((e: ILink) => {
|
||||||
|
// Ignore invalid links
|
||||||
|
if (e.href) {
|
||||||
|
// Create and push the new platform
|
||||||
const platform: TExternalPlatform = {
|
const platform: TExternalPlatform = {
|
||||||
name: el.text,
|
name: e.text,
|
||||||
link: el.href
|
link: e.href
|
||||||
};
|
};
|
||||||
|
|
||||||
author.platforms.push(platform);
|
author.platforms.push(platform);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
hw.authors = [author];
|
}
|
||||||
//#endregion Convert the author
|
|
||||||
|
|
||||||
//#region Get the changelog
|
return [author];
|
||||||
hw.changelog = [];
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse the changelog from the post's data.
|
||||||
|
*/
|
||||||
|
function parseChangelog(elements: IPostElement[]): TChangelog[] {
|
||||||
|
// Local variables
|
||||||
|
const changelog = [];
|
||||||
const changelogElement =
|
const changelogElement =
|
||||||
getPostElementByName(elements, "changelog") || getPostElementByName(elements, "change-log");
|
getPostElementByName(elements, "changelog") || getPostElementByName(elements, "change-log");
|
||||||
|
|
||||||
if (changelogElement?.content) {
|
if (changelogElement) {
|
||||||
const changelogSpoiler = changelogElement.content.find(
|
// regex used to match version tags
|
||||||
(el) => el.type === "Spoiler" && el.content.length > 0
|
const versionRegex = /^v[0-9]+\.[0-9]+.*/;
|
||||||
);
|
|
||||||
|
|
||||||
// Add to the changelog the single spoilers
|
// Get the indexes of the version tags
|
||||||
const spoilers = changelogSpoiler.content
|
const indexesVersion = changelogElement.content
|
||||||
.filter((e) => e.text.trim() !== "")
|
.filter((e) => e.type === "Text" && versionRegex.test(e.text))
|
||||||
.map((e) => e.text);
|
.map((e) => changelogElement.content.indexOf(e));
|
||||||
hw.changelog.push(...spoilers);
|
|
||||||
|
|
||||||
// Add at the end also the text of the "changelog" element
|
const results = indexesVersion.map((i, j) => {
|
||||||
hw.changelog.push(changelogSpoiler.text);
|
// In-loop variable
|
||||||
|
const versionChangelog: TChangelog = {
|
||||||
|
version: "",
|
||||||
|
information: []
|
||||||
|
};
|
||||||
|
|
||||||
|
// Get the difference in indexes between this and the next version tag
|
||||||
|
const diff = indexesVersion[j + 1] ?? changelogElement.content.length;
|
||||||
|
|
||||||
|
// fetch the group of data of this version tag
|
||||||
|
const group = changelogElement.content.slice(i, diff);
|
||||||
|
versionChangelog.version = group.shift().text.replace("v", "").trim();
|
||||||
|
|
||||||
|
// parse the data
|
||||||
|
group.forEach((e) => {
|
||||||
|
if (e.type === "Generic" || e.type === "Spoiler") {
|
||||||
|
const textes = e.content.map((c) => c.text);
|
||||||
|
versionChangelog.information.push(...textes);
|
||||||
|
} else versionChangelog.information.push(e.text);
|
||||||
|
});
|
||||||
|
|
||||||
|
return versionChangelog;
|
||||||
|
});
|
||||||
|
|
||||||
|
changelog.push(...results);
|
||||||
}
|
}
|
||||||
//#endregion Get the changelog
|
|
||||||
|
return changelog;
|
||||||
}
|
}
|
||||||
|
|
||||||
//#endregion Private methods
|
//#endregion Private methods
|
||||||
|
|
|
@ -8,17 +8,41 @@
|
||||||
// Import from files
|
// Import from files
|
||||||
import { POST } from "../constants/css-selector";
|
import { POST } from "../constants/css-selector";
|
||||||
|
|
||||||
|
// Types
|
||||||
|
type TNodeType = "Text" | "Formatted" | "Spoiler" | "Link" | "List" | "Noscript" | "Unknown";
|
||||||
|
|
||||||
//#region Interfaces
|
//#region Interfaces
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents an element contained in the post.
|
||||||
|
*/
|
||||||
export interface IPostElement {
|
export interface IPostElement {
|
||||||
|
/**
|
||||||
|
* Type of element.
|
||||||
|
*/
|
||||||
type: "Generic" | "Text" | "Link" | "Image" | "Spoiler";
|
type: "Generic" | "Text" | "Link" | "Image" | "Spoiler";
|
||||||
|
/**
|
||||||
|
* Name associated with the element.
|
||||||
|
*/
|
||||||
name: string;
|
name: string;
|
||||||
|
/**
|
||||||
|
* Text of the content of the element excluding any children.
|
||||||
|
*/
|
||||||
text: string;
|
text: string;
|
||||||
|
/**
|
||||||
|
* Children elements contained in this element.
|
||||||
|
*/
|
||||||
content: IPostElement[];
|
content: IPostElement[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a link type link in the post.
|
||||||
|
*/
|
||||||
export interface ILink extends IPostElement {
|
export interface ILink extends IPostElement {
|
||||||
type: "Image" | "Link";
|
type: "Image" | "Link";
|
||||||
|
/**
|
||||||
|
* Link to the resource.
|
||||||
|
*/
|
||||||
href: string;
|
href: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,7 +78,7 @@ export function parseF95ThreadPost($: cheerio.Root, post: cheerio.Cheerio): IPos
|
||||||
supernode = removeEmptyContentFromElement(supernode);
|
supernode = removeEmptyContentFromElement(supernode);
|
||||||
|
|
||||||
// Finally parse the elements to create the pairs of title/data
|
// Finally parse the elements to create the pairs of title/data
|
||||||
return associateNameToElements(supernode.content);
|
return pairUpElements(supernode.content);
|
||||||
}
|
}
|
||||||
|
|
||||||
//#endregion Public methods
|
//#endregion Public methods
|
||||||
|
@ -94,8 +118,8 @@ function isLinkNode(node: cheerio.Element): boolean {
|
||||||
|
|
||||||
// The node is a valid DOM element
|
// The node is a valid DOM element
|
||||||
if (node.type === "tag") {
|
if (node.type === "tag") {
|
||||||
const el = node as cheerio.TagElement;
|
const e = node as cheerio.TagElement;
|
||||||
valid = el.name === "a" || el.name === "img";
|
valid = e.name === "a" || e.name === "img";
|
||||||
}
|
}
|
||||||
|
|
||||||
return valid;
|
return valid;
|
||||||
|
@ -108,6 +132,32 @@ function isNoScriptNode(node: cheerio.Element): boolean {
|
||||||
return node.type === "tag" && node.name === "noscript";
|
return node.type === "tag" && node.name === "noscript";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the node is a list element, i.e. `<li>` or `<ul>` tag.
|
||||||
|
*/
|
||||||
|
function isListNode(node: cheerio.Element): boolean {
|
||||||
|
return node.type === "tag" && (node.name === "ul" || node.name === "li");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Idetnify the type of node passed by parameter.
|
||||||
|
*/
|
||||||
|
function nodeType($: cheerio.Root, node: cheerio.Element): TNodeType {
|
||||||
|
// Function map
|
||||||
|
const functionMap = {
|
||||||
|
Text: (node: cheerio.Element) => isTextNode(node) && !isFormattingNode(node),
|
||||||
|
Formatted: (node: cheerio.Element) => isFormattingNode(node),
|
||||||
|
Spoiler: (node: cheerio.Element) => isSpoilerNode($(node)),
|
||||||
|
Link: (node: cheerio.Element) => isLinkNode(node),
|
||||||
|
List: (node: cheerio.Element) => isListNode(node),
|
||||||
|
Noscript: (node: cheerio.Element) => isNoScriptNode(node)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Parse and return the type of the node
|
||||||
|
const result = Object.keys(functionMap).find((e) => functionMap[e](node));
|
||||||
|
return result ? (result as TNodeType) : "Unknown";
|
||||||
|
}
|
||||||
|
|
||||||
//#endregion Node Type
|
//#endregion Node Type
|
||||||
|
|
||||||
//#region Parse Cheerio node
|
//#region Parse Cheerio node
|
||||||
|
@ -139,10 +189,10 @@ function parseCheerioSpoilerNode($: cheerio.Root, node: cheerio.Cheerio): IPostE
|
||||||
.find(POST.SPOILER_CONTENT)
|
.find(POST.SPOILER_CONTENT)
|
||||||
.contents()
|
.contents()
|
||||||
.toArray()
|
.toArray()
|
||||||
.map((el) => parseCheerioNode($, el));
|
.map((e) => parseCheerioNode($, e));
|
||||||
|
|
||||||
// Clean text
|
// Clean text (Spoiler has no text) @todo
|
||||||
spoiler.text = spoiler.text.replace(/\s\s+/g, " ").trim();
|
// spoiler.text = spoiler.text.replace(/\s\s+/g, " ").trim();
|
||||||
return spoiler;
|
return spoiler;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -185,6 +235,31 @@ function parseCheerioTextNode(node: cheerio.Cheerio): IPostElement {
|
||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the text of the node only, excluding child nodes.
|
||||||
|
* Also includes formatted text elements (i.e. `<b>`).
|
||||||
|
*/
|
||||||
|
function getCheerioNonChildrenText(node: cheerio.Cheerio): string {
|
||||||
|
// Local variable
|
||||||
|
let text = "";
|
||||||
|
|
||||||
|
// If the node has no children, return the node's text
|
||||||
|
if (node.contents().length === 1) {
|
||||||
|
// @todo Remove IF after cheerio RC6
|
||||||
|
text = node.text();
|
||||||
|
} else {
|
||||||
|
// Find all the text nodes in the node
|
||||||
|
text = node
|
||||||
|
.first()
|
||||||
|
.contents() // @todo Change to children() after cheerio RC6
|
||||||
|
.filter((idx, e) => isTextNode(e))
|
||||||
|
.text();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean and return the text
|
||||||
|
return text.replace(/\s\s+/g, " ").trim();
|
||||||
|
}
|
||||||
|
|
||||||
//#endregion Parse Cheerio node
|
//#endregion Parse Cheerio node
|
||||||
|
|
||||||
//#region IPostElement utility
|
//#region IPostElement utility
|
||||||
|
@ -219,65 +294,30 @@ function createGenericElement(): IPostElement {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if the element contains the overview of a thread (post #1).
|
* Clean the element `name` and `text` removing initial and final special characters.
|
||||||
*/
|
*/
|
||||||
function elementIsOverview(element: IPostElement): boolean {
|
function cleanElement(element: IPostElement): IPostElement {
|
||||||
// Search the text element that start with "overview"
|
|
||||||
const result = element.content
|
|
||||||
.filter((e) => e.type === "Text")
|
|
||||||
.find((e) => e.text.toUpperCase().startsWith("OVERVIEW"));
|
|
||||||
return result !== undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* If the element contains the overview of a thread, parse it.
|
|
||||||
*/
|
|
||||||
function getOverviewFromElement(element: IPostElement): string {
|
|
||||||
// Local variables
|
// Local variables
|
||||||
const alphanumericRegex = new RegExp("[a-zA-Z0-9]+");
|
const shallow = Object.assign({}, element);
|
||||||
|
const specialCharSet = /[-!$%^&*()_+|~=`{}[\]:";'<>?,./]/;
|
||||||
|
const startsWithSpecialCharsRegex = new RegExp("^" + specialCharSet.source);
|
||||||
|
const endsWithSpecialCharsRegex = new RegExp(specialCharSet.source + "$");
|
||||||
|
|
||||||
// Get all the text values of the overview
|
shallow.name = shallow.name
|
||||||
const textes = element.content
|
.replace(startsWithSpecialCharsRegex, "")
|
||||||
.filter((e) => e.type === "Text")
|
.replace(endsWithSpecialCharsRegex, "")
|
||||||
.filter((e) => {
|
.trim();
|
||||||
const cleanValue = e.text.toUpperCase().replace("OVERVIEW", "").trim();
|
|
||||||
const isAlphanumeric = alphanumericRegex.test(cleanValue);
|
|
||||||
|
|
||||||
return cleanValue !== "" && isAlphanumeric;
|
shallow.text = shallow.text
|
||||||
})
|
.replace(startsWithSpecialCharsRegex, "")
|
||||||
.map((e) => e.text);
|
.replace(endsWithSpecialCharsRegex, "")
|
||||||
|
.trim();
|
||||||
|
|
||||||
// Joins the textes
|
return shallow;
|
||||||
return textes.join(" ");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//#endregion IPostElement utility
|
//#endregion IPostElement utility
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the text of the node only, excluding child nodes.
|
|
||||||
* Also includes formatted text elements (i.e. `<b>`).
|
|
||||||
*/
|
|
||||||
function getCheerioNonChildrenText(node: cheerio.Cheerio): string {
|
|
||||||
// Local variable
|
|
||||||
let text = "";
|
|
||||||
|
|
||||||
// If the node has no children, return the node's text
|
|
||||||
if (node.contents().length === 1) {
|
|
||||||
// @todo Remove IF after cheerio RC6
|
|
||||||
text = node.text();
|
|
||||||
} else {
|
|
||||||
// Find all the text nodes in the node
|
|
||||||
text = node
|
|
||||||
.first()
|
|
||||||
.contents() // @todo Change to children() after cheerio RC6
|
|
||||||
.filter((idx, el) => isTextNode(el))
|
|
||||||
.text();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean and return the text
|
|
||||||
return text.replace(/\s\s+/g, " ").trim();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Collapse an `IPostElement` element with a single subnode
|
* Collapse an `IPostElement` element with a single subnode
|
||||||
* in the `Content` field in case it has no information.
|
* in the `Content` field in case it has no information.
|
||||||
|
@ -286,7 +326,7 @@ function reducePostElement(element: IPostElement): IPostElement {
|
||||||
// Local variables
|
// Local variables
|
||||||
const shallowCopy = Object.assign({}, element);
|
const shallowCopy = Object.assign({}, element);
|
||||||
|
|
||||||
// If the node has only one child, return it
|
// If the node has only one child, reduce and return it
|
||||||
if (isPostElementUnknown(shallowCopy) && shallowCopy.content.length === 1) {
|
if (isPostElementUnknown(shallowCopy) && shallowCopy.content.length === 1) {
|
||||||
return reducePostElement(shallowCopy.content[0]);
|
return reducePostElement(shallowCopy.content[0]);
|
||||||
}
|
}
|
||||||
|
@ -304,11 +344,15 @@ function removeEmptyContentFromElement(element: IPostElement, recursive = true):
|
||||||
// Create a copy of the element
|
// Create a copy of the element
|
||||||
const copy = Object.assign({}, element);
|
const copy = Object.assign({}, element);
|
||||||
|
|
||||||
// Find the non-empty nodes
|
|
||||||
const validNodes = copy.content.filter((e) => !isPostElementEmpty(e));
|
|
||||||
|
|
||||||
// Reduce nested contents if recursive
|
// Reduce nested contents if recursive
|
||||||
if (recursive) validNodes.forEach((e) => removeEmptyContentFromElement(e));
|
const recursiveResult = recursive
|
||||||
|
? element.content.map((e) => removeEmptyContentFromElement(e))
|
||||||
|
: copy.content;
|
||||||
|
|
||||||
|
// Find the non-empty nodes
|
||||||
|
const validNodes = recursiveResult
|
||||||
|
.filter((e) => !isPostElementEmpty(e)) // Remove the empty nodes
|
||||||
|
.filter((e) => !isPostElementEmpty(cleanElement(e))); // Remove the useless nodes
|
||||||
|
|
||||||
// Assign the nodes
|
// Assign the nodes
|
||||||
copy.content = validNodes;
|
copy.content = validNodes;
|
||||||
|
@ -321,27 +365,36 @@ function removeEmptyContentFromElement(element: IPostElement, recursive = true):
|
||||||
*/
|
*/
|
||||||
function parseCheerioNode($: cheerio.Root, node: cheerio.Element): IPostElement {
|
function parseCheerioNode($: cheerio.Root, node: cheerio.Element): IPostElement {
|
||||||
// Local variables
|
// Local variables
|
||||||
let post: IPostElement = createGenericElement();
|
|
||||||
const cheerioNode = $(node);
|
const cheerioNode = $(node);
|
||||||
|
|
||||||
// Parse the node
|
// Function mapping
|
||||||
if (!isNoScriptNode(node)) {
|
const functionMap = {
|
||||||
if (isTextNode(node) && !isFormattingNode(node)) post = parseCheerioTextNode(cheerioNode);
|
Text: (node: cheerio.Cheerio) => parseCheerioTextNode(node),
|
||||||
else if (isSpoilerNode(cheerioNode)) post = parseCheerioSpoilerNode($, cheerioNode);
|
Spoiler: (node: cheerio.Cheerio) => parseCheerioSpoilerNode($, node),
|
||||||
else if (isLinkNode(node)) post = parseCheerioLinkNode(cheerioNode);
|
Link: (node: cheerio.Cheerio) => parseCheerioLinkNode(node)
|
||||||
|
};
|
||||||
|
|
||||||
// Avoid duplication of link name
|
// Get the type of node
|
||||||
if (!isLinkNode(node)) {
|
const type = nodeType($, node);
|
||||||
// Parse the node's childrens
|
|
||||||
|
// Get the post based on the type of node
|
||||||
|
const post = Object.keys(functionMap).includes(type)
|
||||||
|
? functionMap[type]($(node))
|
||||||
|
: createGenericElement();
|
||||||
|
|
||||||
|
// Parse the childrens only if the node is a <b>/<i> element, a list
|
||||||
|
// or a unknown element. For the link in unnecessary while for the
|
||||||
|
// spoilers is already done in parseCheerioSpoilerNode
|
||||||
|
const includeTypes: TNodeType[] = ["Formatted", "List", "Unknown"];
|
||||||
|
if (includeTypes.includes(type)) {
|
||||||
const childPosts = cheerioNode
|
const childPosts = cheerioNode
|
||||||
.contents() // @todo Change to children() after cheerio RC6
|
.contents() // @todo Change to children() after cheerio RC6
|
||||||
.toArray()
|
.toArray()
|
||||||
.filter((el) => el) // Ignore undefined elements
|
.filter((e) => e) // Ignore undefined elements
|
||||||
.map((el) => parseCheerioNode($, el))
|
.map((e) => parseCheerioNode($, e))
|
||||||
.filter((el) => !isPostElementEmpty(el));
|
.filter((e) => !isPostElementEmpty(e));
|
||||||
post.content.push(...childPosts);
|
post.content.push(...childPosts);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return post;
|
return post;
|
||||||
}
|
}
|
||||||
|
@ -350,50 +403,118 @@ function parseCheerioNode($: cheerio.Root, node: cheerio.Element): IPostElement
|
||||||
* It simplifies the `IPostElement` elements by associating
|
* It simplifies the `IPostElement` elements by associating
|
||||||
* the corresponding value to each characterizing element (i.e. author).
|
* the corresponding value to each characterizing element (i.e. author).
|
||||||
*/
|
*/
|
||||||
function associateNameToElements(elements: IPostElement[]): IPostElement[] {
|
function pairUpElements(elements: IPostElement[]): IPostElement[] {
|
||||||
// Local variables
|
// Local variables
|
||||||
const pairs: IPostElement[] = [];
|
const shallow = [...elements];
|
||||||
const specialCharsRegex = /^[-!$%^&*()_+|~=`{}[\]:";'<>?,./]/;
|
|
||||||
const specialRegex = new RegExp(specialCharsRegex);
|
|
||||||
|
|
||||||
for (let i = 0; i < elements.length; i++) {
|
// Parse all the generic elements that
|
||||||
// If the text starts with a special char, clean it
|
// act as "container" for other information
|
||||||
const startWithSpecial = specialRegex.test(elements[i].text);
|
shallow
|
||||||
|
.filter((e) => e.type === "Generic")
|
||||||
|
.map((e) => ({
|
||||||
|
element: e,
|
||||||
|
pairs: pairUpElements(e.content)
|
||||||
|
}))
|
||||||
|
.forEach((e) => {
|
||||||
|
// Find the index of the elements
|
||||||
|
const index = shallow.indexOf(e.element);
|
||||||
|
|
||||||
// Get the latest IPostElement in "pairs"
|
// Remove that elements
|
||||||
const lastIndex = pairs.length - 1;
|
shallow.splice(index, 1);
|
||||||
const lastPair = pairs[lastIndex];
|
|
||||||
|
|
||||||
// If this statement is valid, we have a "data"
|
// Add the pairs at the index of the deleted element
|
||||||
if (elements[i].type === "Text" && startWithSpecial && pairs.length > 0) {
|
e.pairs.forEach((e, i) => shallow.splice(index + i, 0, e));
|
||||||
// We merge this element with the last element appended to 'pairs'
|
});
|
||||||
const cleanText = elements[i].text.replace(specialCharsRegex, "").trim();
|
|
||||||
lastPair.text = lastPair.text || cleanText;
|
|
||||||
lastPair.content.push(...elements[i].content);
|
|
||||||
}
|
|
||||||
// This is a special case
|
|
||||||
else if (elementIsOverview(elements[i])) {
|
|
||||||
// We add the overview to the pairs as a text element
|
|
||||||
elements[i].type = "Text";
|
|
||||||
elements[i].name = "Overview";
|
|
||||||
elements[i].text = getOverviewFromElement(elements[i]);
|
|
||||||
pairs.push(elements[i]);
|
|
||||||
}
|
|
||||||
// We have an element referred to the previous "title"
|
|
||||||
else if (elements[i].type != "Text" && pairs.length > 0) {
|
|
||||||
// We append this element to the content of the last title
|
|
||||||
lastPair.content.push(elements[i]);
|
|
||||||
}
|
|
||||||
// ... else we have a "title" (we need to swap the text to the name because it is a title)
|
|
||||||
else {
|
|
||||||
const swap: IPostElement = Object.assign({}, elements[i]);
|
|
||||||
swap.name = elements[i].text;
|
|
||||||
swap.text = "";
|
|
||||||
pairs.push(swap);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return pairs;
|
// Than we find all the IDs of the elements that are "titles".
|
||||||
|
const indexes = shallow
|
||||||
|
.filter((e, i) => isValidTitleElement(e, i, shallow))
|
||||||
|
.map((e) => shallow.indexOf(e));
|
||||||
|
|
||||||
|
// Now we find all the elements between indexes and
|
||||||
|
// associate them with the previous "title" element
|
||||||
|
return indexes.map((i, j) => parseGroupData(i, j, indexes, shallow));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verify if the `element` is a valid title.
|
||||||
|
* @param element Element to check
|
||||||
|
* @param index Index of the element in `array`
|
||||||
|
* @param array Array of elements to check
|
||||||
|
*/
|
||||||
|
function isValidTitleElement(element: IPostElement, index: number, array: IPostElement[]): boolean {
|
||||||
|
// Check if this element is a "title" checking also the next element
|
||||||
|
const isPostfixDoublePoints = element.text.endsWith(":") && element.text !== ":";
|
||||||
|
const nextElementIsValue = array[index + 1]?.text.startsWith(":");
|
||||||
|
const elementIsTextTitle =
|
||||||
|
element.type === "Text" && (isPostfixDoublePoints || nextElementIsValue);
|
||||||
|
|
||||||
|
// Special values tha must be set has "title"
|
||||||
|
const specialValues = ["DOWNLOAD", "CHANGELOG", "CHANGE-LOG", "GENRE"];
|
||||||
|
const specialTypes = ["Image"];
|
||||||
|
|
||||||
|
// Used to ignore already merged elements with name (ignore spoilers)
|
||||||
|
// because they have as name the content of the spoiler button
|
||||||
|
const hasName = element.name !== "" && element.type !== "Spoiler";
|
||||||
|
|
||||||
|
return (
|
||||||
|
elementIsTextTitle ||
|
||||||
|
specialTypes.includes(element.type) ||
|
||||||
|
specialValues.includes(element.text.toUpperCase()) ||
|
||||||
|
hasName
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Associate the relative values to a title.
|
||||||
|
* @param start Title index in the `elements` array
|
||||||
|
* @param index `start` index in `indexes`
|
||||||
|
* @param indexes List of titles indices in the `elements` array
|
||||||
|
* @param elements Array of elements to group
|
||||||
|
*/
|
||||||
|
function parseGroupData(
|
||||||
|
start: number,
|
||||||
|
index: number,
|
||||||
|
indexes: number[],
|
||||||
|
elements: IPostElement[]
|
||||||
|
): IPostElement {
|
||||||
|
// Local variables
|
||||||
|
const endsWithSpecialCharsRegex = /[-:]$/;
|
||||||
|
const startsWithDoublePointsRegex = /^[:]/;
|
||||||
|
|
||||||
|
// Find all the elements (title + data) of the same data group
|
||||||
|
const nextIndex = indexes[index + 1] ?? elements.length;
|
||||||
|
const group = elements.slice(start, nextIndex);
|
||||||
|
|
||||||
|
// Extract the title
|
||||||
|
const title = group.shift();
|
||||||
|
|
||||||
|
// If the title is already named (beacuse it was
|
||||||
|
// previously elaborated) return it witout
|
||||||
|
if (title.name !== "" && title.type !== "Spoiler") return title;
|
||||||
|
|
||||||
|
// Assign name and text of the title
|
||||||
|
title.name = title.text.replace(endsWithSpecialCharsRegex, "").trim();
|
||||||
|
title.text = group
|
||||||
|
.filter((e) => e.type === "Text")
|
||||||
|
.map((e) =>
|
||||||
|
e.text
|
||||||
|
.replace(startsWithDoublePointsRegex, "") // Remove the starting ":" from the element's text
|
||||||
|
.replace(endsWithSpecialCharsRegex, "") // Remove any special chars at the end
|
||||||
|
.trim()
|
||||||
|
)
|
||||||
|
.join(" ") // Join with space
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
// Append all the content of the elements.
|
||||||
|
group.forEach(
|
||||||
|
(e) =>
|
||||||
|
e.type === "Spoiler"
|
||||||
|
? title.content.push(...e.content) // Add all the content fo the spoiler
|
||||||
|
: title.content.push(e) // Add the element itself
|
||||||
|
);
|
||||||
|
|
||||||
|
return title;
|
||||||
}
|
}
|
||||||
|
|
||||||
//#endregion Private methods
|
//#endregion Private methods
|
||||||
|
|
Loading…
Reference in New Issue