Add an option to customize the user agent at runtime.

This commit is contained in:
King_DuckZ 2015-09-30 01:27:28 +02:00
parent c947eab83f
commit bf3b85498b
2 changed files with 8 additions and 4 deletions

View file

@ -28,13 +28,17 @@ namespace duck {
("dump,d", po::value<std::string>(), "Cleans the retrieved html and saves it to the named file; use - for stdout") ("dump,d", po::value<std::string>(), "Cleans the retrieved html and saves it to the named file; use - for stdout")
("dump-raw,D", po::value<std::string>(), "Saves the retrieved html to the named file; use - for stdout") ("dump-raw,D", po::value<std::string>(), "Saves the retrieved html to the named file; use - for stdout")
; ;
po::options_description query_options("Query options");
query_options.add_options()
("agent", po::value<std::string>()->default_value(DEFAULT_USER_AGENT), "User agent that will be passed to the server")
;
po::options_description positional_options("Positional options"); po::options_description positional_options("Positional options");
positional_options.add_options() positional_options.add_options()
("input-url", po::value<std::string>(), "Input URL") ("input-url", po::value<std::string>(), "Input URL")
("xpath", po::value<std::string>(), "XPath expression") ("xpath", po::value<std::string>(), "XPath expression")
; ;
po::options_description all("Available options"); po::options_description all("Available options");
all.add(desc).add(positional_options); all.add(desc).add(positional_options).add(query_options);
po::positional_options_description pd; po::positional_options_description pd;
pd.add("input-url", 1).add("xpath", 1); pd.add("input-url", 1).add("xpath", 1);
try { try {
@ -48,7 +52,7 @@ namespace duck {
if (parVarMap.count("help")) { if (parVarMap.count("help")) {
po::options_description visible("Available options"); po::options_description visible("Available options");
visible.add(desc); visible.add(desc).add(query_options);
std::cout << "Usage: " << PROGRAM_NAME << " [options...] <url> <xpath>\n"; std::cout << "Usage: " << PROGRAM_NAME << " [options...] <url> <xpath>\n";
std::cout << "You can pass - as the url to read from stdin\n"; std::cout << "You can pass - as the url to read from stdin\n";
std::cout << visible; std::cout << visible;

View file

@ -1,6 +1,5 @@
#include "htmlretrieve.hpp" #include "htmlretrieve.hpp"
#include "commandline.hpp" #include "commandline.hpp"
#include "duckscraperConfig.h"
#include <iostream> #include <iostream>
#include <string> #include <string>
#include <pugixml.hpp> #include <pugixml.hpp>
@ -38,12 +37,13 @@ int main (int argc, char* argv[]) {
#if !defined(NDEBUG) #if !defined(NDEBUG)
std::cout << "URL : " << url << "\n"; std::cout << "URL : " << url << "\n";
std::cout << "XPath: " << xpath << std::endl; std::cout << "XPath: " << xpath << std::endl;
std::cout << "Agent: " << vm["agent"].as<std::string>() << std::endl;
#endif #endif
std::string html; std::string html;
if ("-" != url) { if ("-" != url) {
html = duck::fetch_html(url, DEFAULT_USER_AGENT, false, false); html = duck::fetch_html(url, vm["agent"].as<std::string>(), false, false);
} }
else { else {
std::cin >> std::noskipws; std::cin >> std::noskipws;