Fix the html cleaning code that was not really cleaning.

This commit is contained in:
King_DuckZ 2015-09-28 22:59:09 +02:00
parent 8e517e5de9
commit 00150938dd

View file

@ -29,13 +29,10 @@ namespace duck {
std::string cleanHTML (std::string&& html) {
dropScriptTags(html);
std::unique_ptr<char[]> html_copy(new char[html.size()]);
std::copy(html.begin(), html.end(), html_copy.get());
// Initialize a Tidy document
TidyDoc tidyDoc = tidyCreate();
TidyBuffer tidyOutputBuffer;
tidyBufInit(&tidyOutputBuffer);
TidyBuffer tidyOutputBuffer = {nullptr, nullptr, 0, 0, 0};
// Configure Tidy
// The flags tell Tidy to output XML and disable showing warnings
@ -48,14 +45,18 @@ namespace duck {
// Parse input
if (configSuccess) {
tidyBufAppend(&tidyOutputBuffer, html_copy.get(), html.size());
tidyResponseCode = tidyParseBuffer(tidyDoc, &tidyOutputBuffer);
tidyResponseCode = tidyParseString(tidyDoc, html.c_str());
}
// Process HTML
if (tidyResponseCode >= 0)
tidyResponseCode = tidyCleanAndRepair(tidyDoc);
if (tidyResponseCode >= 0)
tidyResponseCode = tidyRunDiagnostics(tidyDoc);
if (tidyResponseCode > 1)
tidyResponseCode = (tidyOptSetBool(tidyDoc, TidyForceOutput, yes) ? tidyResponseCode : -1);
// Output the HTML to our buffer
if (tidyResponseCode >= 0)
tidyResponseCode = tidySaveBuffer(tidyDoc, &tidyOutputBuffer);