Reduce cyclomatic complexity in parseCheerioNode

pull/83/head
MillenniumEarl 2021-03-21 18:44:34 +01:00
parent dcc5ed973f
commit 751036f0d3
1 changed files with 51 additions and 19 deletions

View File

@ -8,6 +8,9 @@
// Import from files // Import from files
import { POST } from "../constants/css-selector"; import { POST } from "../constants/css-selector";
// Types
type NodeTypeT = "Text" | "Formatted" | "Spoiler" | "Link" | "List" | "Noscript" | "Unknown";
//#region Interfaces //#region Interfaces
/** /**
@ -136,6 +139,25 @@ function isListNode(node: cheerio.Element): boolean {
return node.type === "tag" && (node.name === "ul" || node.name === "li"); return node.type === "tag" && (node.name === "ul" || node.name === "li");
} }
/**
* Idetnify the type of node passed by parameter.
*/
function nodeType($: cheerio.Root, node: cheerio.Element): NodeTypeT {
// Function map
const functionMap = {
Text: (node: cheerio.Element) => isTextNode(node) && !isFormattingNode(node),
Formatted: (node: cheerio.Element) => isFormattingNode(node),
Spoiler: (node: cheerio.Element) => isSpoilerNode($(node)),
Link: (node: cheerio.Element) => isLinkNode(node),
List: (node: cheerio.Element) => isListNode(node),
Noscript: (node: cheerio.Element) => isNoScriptNode(node)
};
// Parse and return the type of the node
const result = Object.keys(functionMap).find((e) => functionMap[e](node));
return result ? (result as NodeTypeT) : "Unknown";
}
//#endregion Node Type //#endregion Node Type
//#region Parse Cheerio node //#region Parse Cheerio node
@ -351,28 +373,34 @@ function removeEmptyContentFromElement(element: IPostElement, recursive = true):
*/ */
function parseCheerioNode($: cheerio.Root, node: cheerio.Element): IPostElement { function parseCheerioNode($: cheerio.Root, node: cheerio.Element): IPostElement {
// Local variables // Local variables
let post: IPostElement = createGenericElement();
const cheerioNode = $(node); const cheerioNode = $(node);
// Parse the node // Function mapping
if (!isNoScriptNode(node)) { const functionMap = {
if (isTextNode(node) && !isFormattingNode(node)) post = parseCheerioTextNode(cheerioNode); Text: (node: cheerio.Cheerio) => parseCheerioTextNode(node),
else if (isSpoilerNode(cheerioNode)) post = parseCheerioSpoilerNode($, cheerioNode); Spoiler: (node: cheerio.Cheerio) => parseCheerioSpoilerNode($, node),
else if (isLinkNode(node)) post = parseCheerioLinkNode(cheerioNode); Link: (node: cheerio.Cheerio) => parseCheerioLinkNode(node)
};
// Check for childrens only if the node is a <b>/<i> element // Get the type of node
// or a list element. For the link in unnecessary while for const type = nodeType($, node);
// the spoilers is already done in parseCheerioSpoilerNode
if (isFormattingNode(node) || isListNode(node)) { // Get the post based on the type of node
// Parse the node's childrens const post = Object.keys(functionMap).includes(type)
const childPosts = cheerioNode ? functionMap[type]($(node))
.contents() // @todo Change to children() after cheerio RC6 : createGenericElement();
.toArray()
.filter((el) => el) // Ignore undefined elements // Parse the childrens only if the node is a <b>/<i> element
.map((el) => parseCheerioNode($, el)) // or a list element. For the link in unnecessary while for
.filter((el) => !isPostElementEmpty(el)); // the spoilers is already done in parseCheerioSpoilerNode
post.content.push(...childPosts); if (type === "Formatted" || type === "List") {
} const childPosts = cheerioNode
.contents() // @todo Change to children() after cheerio RC6
.toArray()
.filter((e) => e) // Ignore undefined elements
.map((e) => parseCheerioNode($, e))
.filter((e) => !isPostElementEmpty(e));
post.content.push(...childPosts);
} }
return post; return post;
@ -428,6 +456,10 @@ function associateNameToElements(elements: IPostElement[]): IPostElement[] {
return pairUp(elements); return pairUp(elements);
} }
/**
* It simplifies the `IPostElement` elements by associating
* the corresponding value to each characterizing element (i.e. author).
*/
function pairUp(elements: IPostElement[]): IPostElement[] { function pairUp(elements: IPostElement[]): IPostElement[] {
// First ignore the "Generic" type elements, because // First ignore the "Generic" type elements, because
// they usually are containers for other data, like // they usually are containers for other data, like