Fix the html cleaning code that was not really cleaning.
This commit is contained in:
parent
8e517e5de9
commit
00150938dd
1 changed files with 7 additions and 6 deletions
|
@ -29,13 +29,10 @@ namespace duck {
|
|||
|
||||
std::string cleanHTML (std::string&& html) {
|
||||
dropScriptTags(html);
|
||||
std::unique_ptr<char[]> html_copy(new char[html.size()]);
|
||||
std::copy(html.begin(), html.end(), html_copy.get());
|
||||
|
||||
// Initialize a Tidy document
|
||||
TidyDoc tidyDoc = tidyCreate();
|
||||
TidyBuffer tidyOutputBuffer;
|
||||
tidyBufInit(&tidyOutputBuffer);
|
||||
TidyBuffer tidyOutputBuffer = {nullptr, nullptr, 0, 0, 0};
|
||||
|
||||
// Configure Tidy
|
||||
// The flags tell Tidy to output XML and disable showing warnings
|
||||
|
@ -48,14 +45,18 @@ namespace duck {
|
|||
|
||||
// Parse input
|
||||
if (configSuccess) {
|
||||
tidyBufAppend(&tidyOutputBuffer, html_copy.get(), html.size());
|
||||
tidyResponseCode = tidyParseBuffer(tidyDoc, &tidyOutputBuffer);
|
||||
tidyResponseCode = tidyParseString(tidyDoc, html.c_str());
|
||||
}
|
||||
|
||||
// Process HTML
|
||||
if (tidyResponseCode >= 0)
|
||||
tidyResponseCode = tidyCleanAndRepair(tidyDoc);
|
||||
|
||||
if (tidyResponseCode >= 0)
|
||||
tidyResponseCode = tidyRunDiagnostics(tidyDoc);
|
||||
if (tidyResponseCode > 1)
|
||||
tidyResponseCode = (tidyOptSetBool(tidyDoc, TidyForceOutput, yes) ? tidyResponseCode : -1);
|
||||
|
||||
// Output the HTML to our buffer
|
||||
if (tidyResponseCode >= 0)
|
||||
tidyResponseCode = tidySaveBuffer(tidyDoc, &tidyOutputBuffer);
|
||||
|
|
Loading…
Reference in a new issue