Add an option to customize the user agent at runtime.
This commit is contained in:
parent
c947eab83f
commit
bf3b85498b
2 changed files with 8 additions and 4 deletions
|
@ -28,13 +28,17 @@ namespace duck {
|
||||||
("dump,d", po::value<std::string>(), "Cleans the retrieved html and saves it to the named file; use - for stdout")
|
("dump,d", po::value<std::string>(), "Cleans the retrieved html and saves it to the named file; use - for stdout")
|
||||||
("dump-raw,D", po::value<std::string>(), "Saves the retrieved html to the named file; use - for stdout")
|
("dump-raw,D", po::value<std::string>(), "Saves the retrieved html to the named file; use - for stdout")
|
||||||
;
|
;
|
||||||
|
po::options_description query_options("Query options");
|
||||||
|
query_options.add_options()
|
||||||
|
("agent", po::value<std::string>()->default_value(DEFAULT_USER_AGENT), "User agent that will be passed to the server")
|
||||||
|
;
|
||||||
po::options_description positional_options("Positional options");
|
po::options_description positional_options("Positional options");
|
||||||
positional_options.add_options()
|
positional_options.add_options()
|
||||||
("input-url", po::value<std::string>(), "Input URL")
|
("input-url", po::value<std::string>(), "Input URL")
|
||||||
("xpath", po::value<std::string>(), "XPath expression")
|
("xpath", po::value<std::string>(), "XPath expression")
|
||||||
;
|
;
|
||||||
po::options_description all("Available options");
|
po::options_description all("Available options");
|
||||||
all.add(desc).add(positional_options);
|
all.add(desc).add(positional_options).add(query_options);
|
||||||
po::positional_options_description pd;
|
po::positional_options_description pd;
|
||||||
pd.add("input-url", 1).add("xpath", 1);
|
pd.add("input-url", 1).add("xpath", 1);
|
||||||
try {
|
try {
|
||||||
|
@ -48,7 +52,7 @@ namespace duck {
|
||||||
|
|
||||||
if (parVarMap.count("help")) {
|
if (parVarMap.count("help")) {
|
||||||
po::options_description visible("Available options");
|
po::options_description visible("Available options");
|
||||||
visible.add(desc);
|
visible.add(desc).add(query_options);
|
||||||
std::cout << "Usage: " << PROGRAM_NAME << " [options...] <url> <xpath>\n";
|
std::cout << "Usage: " << PROGRAM_NAME << " [options...] <url> <xpath>\n";
|
||||||
std::cout << "You can pass - as the url to read from stdin\n";
|
std::cout << "You can pass - as the url to read from stdin\n";
|
||||||
std::cout << visible;
|
std::cout << visible;
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
#include "htmlretrieve.hpp"
|
#include "htmlretrieve.hpp"
|
||||||
#include "commandline.hpp"
|
#include "commandline.hpp"
|
||||||
#include "duckscraperConfig.h"
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <pugixml.hpp>
|
#include <pugixml.hpp>
|
||||||
|
@ -38,12 +37,13 @@ int main (int argc, char* argv[]) {
|
||||||
#if !defined(NDEBUG)
|
#if !defined(NDEBUG)
|
||||||
std::cout << "URL : " << url << "\n";
|
std::cout << "URL : " << url << "\n";
|
||||||
std::cout << "XPath: " << xpath << std::endl;
|
std::cout << "XPath: " << xpath << std::endl;
|
||||||
|
std::cout << "Agent: " << vm["agent"].as<std::string>() << std::endl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
std::string html;
|
std::string html;
|
||||||
|
|
||||||
if ("-" != url) {
|
if ("-" != url) {
|
||||||
html = duck::fetch_html(url, DEFAULT_USER_AGENT, false, false);
|
html = duck::fetch_html(url, vm["agent"].as<std::string>(), false, false);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
std::cin >> std::noskipws;
|
std::cin >> std::noskipws;
|
||||||
|
|
Loading…
Reference in a new issue