Refactoring code
parent
b74a212c80
commit
b7d27483cc
|
@ -235,6 +235,31 @@ function parseCheerioTextNode(node: cheerio.Cheerio): IPostElement {
|
||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the text of the node only, excluding child nodes.
|
||||||
|
* Also includes formatted text elements (i.e. `<b>`).
|
||||||
|
*/
|
||||||
|
function getCheerioNonChildrenText(node: cheerio.Cheerio): string {
|
||||||
|
// Local variable
|
||||||
|
let text = "";
|
||||||
|
|
||||||
|
// If the node has no children, return the node's text
|
||||||
|
if (node.contents().length === 1) {
|
||||||
|
// @todo Remove IF after cheerio RC6
|
||||||
|
text = node.text();
|
||||||
|
} else {
|
||||||
|
// Find all the text nodes in the node
|
||||||
|
text = node
|
||||||
|
.first()
|
||||||
|
.contents() // @todo Change to children() after cheerio RC6
|
||||||
|
.filter((idx, e) => isTextNode(e))
|
||||||
|
.text();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean and return the text
|
||||||
|
return text.replace(/\s\s+/g, " ").trim();
|
||||||
|
}
|
||||||
|
|
||||||
//#endregion Parse Cheerio node
|
//#endregion Parse Cheerio node
|
||||||
|
|
||||||
//#region IPostElement utility
|
//#region IPostElement utility
|
||||||
|
@ -268,66 +293,8 @@ function createGenericElement(): IPostElement {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if the element contains the overview of a thread (post #1).
|
|
||||||
*/
|
|
||||||
function elementIsOverview(element: IPostElement): boolean {
|
|
||||||
// Search the text element that start with "overview"
|
|
||||||
const result = element.content
|
|
||||||
.filter((e) => e.type === "Text")
|
|
||||||
.find((e) => e.text.toUpperCase().startsWith("OVERVIEW"));
|
|
||||||
return result !== undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* If the element contains the overview of a thread, parse it.
|
|
||||||
*/
|
|
||||||
function getOverviewFromElement(element: IPostElement): string {
|
|
||||||
// Local variables
|
|
||||||
const alphanumericRegex = new RegExp("[a-zA-Z0-9]+");
|
|
||||||
|
|
||||||
// Get all the text values of the overview
|
|
||||||
const textes = element.content
|
|
||||||
.filter((e) => e.type === "Text")
|
|
||||||
.filter((e) => {
|
|
||||||
const cleanValue = e.text.toUpperCase().replace("OVERVIEW", "").trim();
|
|
||||||
const isAlphanumeric = alphanumericRegex.test(cleanValue);
|
|
||||||
|
|
||||||
return cleanValue !== "" && isAlphanumeric;
|
|
||||||
})
|
|
||||||
.map((e) => e.text);
|
|
||||||
|
|
||||||
// Joins the textes
|
|
||||||
return textes.join(" ");
|
|
||||||
}
|
|
||||||
|
|
||||||
//#endregion IPostElement utility
|
//#endregion IPostElement utility
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the text of the node only, excluding child nodes.
|
|
||||||
* Also includes formatted text elements (i.e. `<b>`).
|
|
||||||
*/
|
|
||||||
function getCheerioNonChildrenText(node: cheerio.Cheerio): string {
|
|
||||||
// Local variable
|
|
||||||
let text = "";
|
|
||||||
|
|
||||||
// If the node has no children, return the node's text
|
|
||||||
if (node.contents().length === 1) {
|
|
||||||
// @todo Remove IF after cheerio RC6
|
|
||||||
text = node.text();
|
|
||||||
} else {
|
|
||||||
// Find all the text nodes in the node
|
|
||||||
text = node
|
|
||||||
.first()
|
|
||||||
.contents() // @todo Change to children() after cheerio RC6
|
|
||||||
.filter((idx, e) => isTextNode(e))
|
|
||||||
.text();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean and return the text
|
|
||||||
return text.replace(/\s\s+/g, " ").trim();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Collapse an `IPostElement` element with a single subnode
|
* Collapse an `IPostElement` element with a single subnode
|
||||||
* in the `Content` field in case it has no information.
|
* in the `Content` field in case it has no information.
|
||||||
|
@ -434,14 +401,9 @@ function pairUpElements(elements: IPostElement[]): IPostElement[] {
|
||||||
e.pairs.forEach((e, i) => shallow.splice(index + i, 0, e));
|
e.pairs.forEach((e, i) => shallow.splice(index + i, 0, e));
|
||||||
});
|
});
|
||||||
|
|
||||||
// Ignore the "Generic" elements that we have already parsed
|
// Than we find all the IDs of the elements that are "titles".
|
||||||
//const validElements = shallow.filter((e) => e.type !== "Generic");
|
|
||||||
|
|
||||||
// Than we find all the IDs of "Text" elements where the
|
|
||||||
// text doesn't starts with double points. This means
|
|
||||||
// that we find all the IDs of "title" elements.
|
|
||||||
const indexes = shallow
|
const indexes = shallow
|
||||||
.filter((e, i) => filterValidElements(e, i, shallow))
|
.filter((e, i) => isValidTitleElement(e, i, shallow))
|
||||||
.map((e) => shallow.indexOf(e));
|
.map((e) => shallow.indexOf(e));
|
||||||
|
|
||||||
// Now we find all the elements between indexes and
|
// Now we find all the elements between indexes and
|
||||||
|
@ -449,7 +411,13 @@ function pairUpElements(elements: IPostElement[]): IPostElement[] {
|
||||||
return indexes.map((i, j) => parseGroupData(i, j, indexes, shallow));
|
return indexes.map((i, j) => parseGroupData(i, j, indexes, shallow));
|
||||||
}
|
}
|
||||||
|
|
||||||
function filterValidElements(element: IPostElement, index: number, array: IPostElement[]): boolean {
|
/**
|
||||||
|
* Verify if the `element` is a valid title.
|
||||||
|
* @param element Element to check
|
||||||
|
* @param index Index of the element in `array`
|
||||||
|
* @param array Array of elements to check
|
||||||
|
*/
|
||||||
|
function isValidTitleElement(element: IPostElement, index: number, array: IPostElement[]): boolean {
|
||||||
// Check if this element is a "title" checking also the next element
|
// Check if this element is a "title" checking also the next element
|
||||||
const isPostfixDoublePoints = element.text.endsWith(":") && element.text !== ":";
|
const isPostfixDoublePoints = element.text.endsWith(":") && element.text !== ":";
|
||||||
const nextElementIsValue = array[index + 1]?.text.startsWith(":");
|
const nextElementIsValue = array[index + 1]?.text.startsWith(":");
|
||||||
|
|
Loading…
Reference in New Issue