Add an option to customize the user agent at runtime.
This commit is contained in:
parent
c947eab83f
commit
bf3b85498b
2 changed files with 8 additions and 4 deletions
|
@ -28,13 +28,17 @@ namespace duck {
|
|||
("dump,d", po::value<std::string>(), "Cleans the retrieved html and saves it to the named file; use - for stdout")
|
||||
("dump-raw,D", po::value<std::string>(), "Saves the retrieved html to the named file; use - for stdout")
|
||||
;
|
||||
po::options_description query_options("Query options");
|
||||
query_options.add_options()
|
||||
("agent", po::value<std::string>()->default_value(DEFAULT_USER_AGENT), "User agent that will be passed to the server")
|
||||
;
|
||||
po::options_description positional_options("Positional options");
|
||||
positional_options.add_options()
|
||||
("input-url", po::value<std::string>(), "Input URL")
|
||||
("xpath", po::value<std::string>(), "XPath expression")
|
||||
;
|
||||
po::options_description all("Available options");
|
||||
all.add(desc).add(positional_options);
|
||||
all.add(desc).add(positional_options).add(query_options);
|
||||
po::positional_options_description pd;
|
||||
pd.add("input-url", 1).add("xpath", 1);
|
||||
try {
|
||||
|
@ -48,7 +52,7 @@ namespace duck {
|
|||
|
||||
if (parVarMap.count("help")) {
|
||||
po::options_description visible("Available options");
|
||||
visible.add(desc);
|
||||
visible.add(desc).add(query_options);
|
||||
std::cout << "Usage: " << PROGRAM_NAME << " [options...] <url> <xpath>\n";
|
||||
std::cout << "You can pass - as the url to read from stdin\n";
|
||||
std::cout << visible;
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
#include "htmlretrieve.hpp"
|
||||
#include "commandline.hpp"
|
||||
#include "duckscraperConfig.h"
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <pugixml.hpp>
|
||||
|
@ -38,12 +37,13 @@ int main (int argc, char* argv[]) {
|
|||
#if !defined(NDEBUG)
|
||||
std::cout << "URL : " << url << "\n";
|
||||
std::cout << "XPath: " << xpath << std::endl;
|
||||
std::cout << "Agent: " << vm["agent"].as<std::string>() << std::endl;
|
||||
#endif
|
||||
|
||||
std::string html;
|
||||
|
||||
if ("-" != url) {
|
||||
html = duck::fetch_html(url, DEFAULT_USER_AGENT, false, false);
|
||||
html = duck::fetch_html(url, vm["agent"].as<std::string>(), false, false);
|
||||
}
|
||||
else {
|
||||
std::cin >> std::noskipws;
|
||||
|
|
Loading…
Reference in a new issue