From 14a290468f9a7ebf9e97e52416a3c9dae1ae3aa4 Mon Sep 17 00:00:00 2001 From: MillenniumEarl Date: Sun, 21 Mar 2021 16:55:15 +0100 Subject: [PATCH] Fix removeEmptyContent recursion and parse node children only for formmating nodes --- src/scripts/scrape-data/post-parse.ts | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/scripts/scrape-data/post-parse.ts b/src/scripts/scrape-data/post-parse.ts index 716c19e..cc5e24a 100644 --- a/src/scripts/scrape-data/post-parse.ts +++ b/src/scripts/scrape-data/post-parse.ts @@ -325,11 +325,13 @@ function removeEmptyContentFromElement(element: IPostElement, recursive = true): // Create a copy of the element const copy = Object.assign({}, element); - // Find the non-empty nodes - const validNodes = copy.content.filter((e) => !isPostElementEmpty(e)); - // Reduce nested contents if recursive - if (recursive) validNodes.forEach((e) => removeEmptyContentFromElement(e)); + const recursiveResult = recursive + ? element.content.map((e) => removeEmptyContentFromElement(e)) + : copy.content; + + // Find the non-empty nodes + const validNodes = recursiveResult.filter((e) => !isPostElementEmpty(e)); // Assign the nodes copy.content = validNodes; @@ -351,8 +353,10 @@ function parseCheerioNode($: cheerio.Root, node: cheerio.Element): IPostElement else if (isSpoilerNode(cheerioNode)) post = parseCheerioSpoilerNode($, cheerioNode); else if (isLinkNode(node)) post = parseCheerioLinkNode(cheerioNode); - // Avoid duplication of link name - if (!isLinkNode(node)) { + // Check for childrens only if the node is a / element. + // For the link in unnecessary while for the spoilers is + // already done in parseCheerioSpoilerNode + if (isFormattingNode(node)) { // Parse the node's childrens const childPosts = cheerioNode .contents() // @todo Change to children() after cheerio RC6 @@ -479,7 +483,8 @@ function parseGroupData( .replace(endsWithSpecialCharsRegex, "") // Remove any special chars at the end .trim() ) - .join(" "); // Join with space + .join(" ") // Join with space + .trim(); // Append all the content of non-text elements. group