// Copyright (c) 2021 MillenniumEarl // // This software is released under the MIT License. // https://opensource.org/licenses/MIT "use strict"; // Import from files import { POST } from "../constants/css-selector"; // Types type NodeTypeT = "Text" | "Formatted" | "Spoiler" | "Link" | "List" | "Noscript" | "Unknown"; //#region Interfaces /** * Represents an element contained in the post. */ export interface IPostElement { /** * Type of element. */ type: "Generic" | "Text" | "Link" | "Image" | "Spoiler"; /** * Name associated with the element. */ name: string; /** * Text of the content of the element excluding any children. */ text: string; /** * Children elements contained in this element. */ content: IPostElement[]; } /** * Represents a link type link in the post. */ export interface ILink extends IPostElement { type: "Image" | "Link"; /** * Link to the resource. */ href: string; } //#endregion Interfaces //#region Public methods /** * Given a post of a thread page it extracts the information contained in the body. */ export function parseF95ThreadPost($: cheerio.Root, post: cheerio.Cheerio): IPostElement[] { // The data is divided between "tag" and "text" elements. // Simple data is composed of a "tag" element followed // by a "text" element, while more complex data (contained // in spoilers) is composed of a "tag" element, followed // by a text containing only ":" and then by an additional // "tag" element having as the first term "Spoiler" // First fetch all the elements in the post const elements = post .contents() .toArray() .map((e) => parseCheerioNode($, e)); // Parse the nodes // Create a supernode let supernode = createGenericElement(); supernode.content = elements; // Reduce the nodes supernode = reducePostElement(supernode); // Remove the empty nodes supernode = removeEmptyContentFromElement(supernode); // Finally parse the elements to create the pairs of title/data return associateNameToElements(supernode.content); } //#endregion Public methods //#region Private methods //#region Node type /** * Check if the node passed as a parameter is a formatting one (i.e. ``). */ function isFormattingNode(node: cheerio.Element): boolean { const formattedTags = ["b", "i"]; return node.type === "tag" && formattedTags.includes(node.name); } /** * Check if the node passed as a parameter is of text type. */ function isTextNode(node: cheerio.Element): boolean { return node.type === "text"; } /** * Check if the node is a spoiler. */ function isSpoilerNode(node: cheerio.Cheerio): boolean { return node.attr("class") === "bbCodeSpoiler"; } /** * Check if the node is a link or a image. */ function isLinkNode(node: cheerio.Element): boolean { // Local variables let valid = false; // The node is a valid DOM element if (node.type === "tag") { const el = node as cheerio.TagElement; valid = el.name === "a" || el.name === "img"; } return valid; } /** * Check if the node is a `noscript` tag. */ function isNoScriptNode(node: cheerio.Element): boolean { return node.type === "tag" && node.name === "noscript"; } /** * Check if the node is a list element, i.e. `
  • ` or `