Fix the html cleaning code that was not really cleaning.

This commit is contained in:
King_DuckZ 2015-09-28 22:59:09 +02:00
parent 8e517e5de9
commit 00150938dd

View file

@ -29,13 +29,10 @@ namespace duck {
std::string cleanHTML (std::string&& html) { std::string cleanHTML (std::string&& html) {
dropScriptTags(html); dropScriptTags(html);
std::unique_ptr<char[]> html_copy(new char[html.size()]);
std::copy(html.begin(), html.end(), html_copy.get());
// Initialize a Tidy document // Initialize a Tidy document
TidyDoc tidyDoc = tidyCreate(); TidyDoc tidyDoc = tidyCreate();
TidyBuffer tidyOutputBuffer; TidyBuffer tidyOutputBuffer = {nullptr, nullptr, 0, 0, 0};
tidyBufInit(&tidyOutputBuffer);
// Configure Tidy // Configure Tidy
// The flags tell Tidy to output XML and disable showing warnings // The flags tell Tidy to output XML and disable showing warnings
@ -48,14 +45,18 @@ namespace duck {
// Parse input // Parse input
if (configSuccess) { if (configSuccess) {
tidyBufAppend(&tidyOutputBuffer, html_copy.get(), html.size()); tidyResponseCode = tidyParseString(tidyDoc, html.c_str());
tidyResponseCode = tidyParseBuffer(tidyDoc, &tidyOutputBuffer);
} }
// Process HTML // Process HTML
if (tidyResponseCode >= 0) if (tidyResponseCode >= 0)
tidyResponseCode = tidyCleanAndRepair(tidyDoc); tidyResponseCode = tidyCleanAndRepair(tidyDoc);
if (tidyResponseCode >= 0)
tidyResponseCode = tidyRunDiagnostics(tidyDoc);
if (tidyResponseCode > 1)
tidyResponseCode = (tidyOptSetBool(tidyDoc, TidyForceOutput, yes) ? tidyResponseCode : -1);
// Output the HTML to our buffer // Output the HTML to our buffer
if (tidyResponseCode >= 0) if (tidyResponseCode >= 0)
tidyResponseCode = tidySaveBuffer(tidyDoc, &tidyOutputBuffer); tidyResponseCode = tidySaveBuffer(tidyDoc, &tidyOutputBuffer);