Allow piping.

Atm you still need to specify some parameter for the url, even if
it's not needed. The good news is that the value doesn't have to
be a valid URL, so any string will do.
This commit is contained in:
King_DuckZ 2015-09-28 23:37:42 +02:00
parent 943e760ffd
commit 49aa62815a
3 changed files with 77 additions and 56 deletions

View file

@ -28,6 +28,16 @@ namespace duck {
}
}
bool isHttps (const std::string& parUrl) {
const char protocol[] = "https://";
const size_t protocolLen = sizeof(protocol) / sizeof(protocol[0]) - 1;
if (parUrl.size() < protocolLen)
return false;
return std::equal(protocol, protocol + protocolLen, parUrl.begin());
}
} //unnamed namespace
std::string cleanHTML (std::string&& html) {
dropScriptTags(html);
@ -74,16 +84,6 @@ namespace duck {
return tidyResult;
}
bool isHttps (const std::string& parUrl) {
const char protocol[] = "https://";
const size_t protocolLen = sizeof(protocol) / sizeof(protocol[0]) - 1;
if (parUrl.size() < protocolLen)
return false;
return std::equal(protocol, protocol + protocolLen, parUrl.begin());
}
} //unnamed namespace
std::string getCleanHtml (const std::string& parSource, const std::string& parUserAgent, bool parSslVerifyPeer, bool parSslVerifyHost, DumpRawFunc parDumpRaw) {
using curl::curl_easy;
using curl::curl_pair;

View file

@ -8,6 +8,8 @@ namespace duck {
using DumpRawFunc = std::function<void(const std::string&)>;
std::string getCleanHtml ( const std::string& parSource, const std::string& parUserAgent, bool parSslVerifyPeer, bool parSslVerifyHost, DumpRawFunc parDumpRaw=DumpRawFunc() );
std::string getCleanHtml ( const std::string& parSource, bool parSslVerifyPeer, bool parSslVerifyHost, DumpRawFunc parDumpRaw=DumpRawFunc() );
std::string cleanHTML ( std::string&& html );
} //namespace duck
#endif

View file

@ -10,6 +10,8 @@
#include <boost/program_options.hpp>
#include <memory>
#include <functional>
#include <unistd.h>
#include <iterator>
#define STRINGIZE_IMPL(s) #s
#define STRINGIZE(s) STRINGIZE_IMPL(s)
@ -83,6 +85,32 @@ namespace {
std::ostream* const os = (use_stdout ? &std::cout : ofs.get());
*os << parData;
}
std::string getCleanHtml (const std::string& parUrl, const po::variables_map& parVarMap) {
std::string tidyHtml;
if (isatty(fileno(stdin))) {
tidyHtml = duck::getCleanHtml(
parUrl,
false,
false,
(parVarMap.count("dump-raw") ?
std::bind(&dump_string, parVarMap["dump-raw"].as<std::string>(), std::placeholders::_1)
:
duck::DumpRawFunc()
)
);
}
else {
std::cin >> std::noskipws;
std::istream_iterator<char> it(std::cin);
std::istream_iterator<char> end;
std::string results(it, end);
tidyHtml = duck::cleanHTML(std::move(results));
}
return std::move(tidyHtml);
}
} //unnamed namespace
int main (int argc, char* argv[]) {
@ -97,16 +125,7 @@ int main (int argc, char* argv[]) {
std::cout << "XPath: " << xpath << std::endl;
#endif
const std::string tidyHtml = duck::getCleanHtml(
url,
false,
false,
(vm.count("dump-raw") ?
std::bind(&dump_string, vm["dump-raw"].as<std::string>(), std::placeholders::_1)
:
duck::DumpRawFunc()
)
);
const std::string tidyHtml(getCleanHtml(url, vm));
if (vm.count("dump")) {
dump_string(vm["dump"].as<std::string>(), tidyHtml);
}