Parse options through boost program_options.

This commit is contained in:
King_DuckZ 2015-09-28 21:48:46 +02:00
parent 4f85fa01a9
commit 8e517e5de9
2 changed files with 83 additions and 23 deletions

View file

@ -1,19 +1,15 @@
cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
project(duckscraper CXX)
add_subdirectory(lib/tidy)
add_subdirectory(lib/curlcpp)
project(duckscraper VERSION 0.2 LANGUAGES CXX)
include(GetGitRevisionDescription)
find_package(PugiXML REQUIRED)
find_package(Boost 1.32.0 COMPONENTS program_options)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -std=c++11 -Wall -Wextra -g -O0 -fno-omit-frame-pointer")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++11 -Wall -Wextra -g -O3 -fomit-frame-pointer")
set(DEFAULT_USER_AGENT "DuckScraper")
set(PROJECT_VERSION_MAJOR "0")
set(PROJECT_VERSION_MINOR "1")
set(PROJECT_VERSION_BETA "1")
get_git_head_revision(GIT_REFSPEC PROJECT_VERSION_GIT)
@ -25,7 +21,8 @@ configure_file(
include_directories(SYSTEM
lib/tidy/include
${PUGIXML_INCLUDE_DIR}
${CURLCPP_SOURCE_DIR}/include
lib/curlcpp/include
${Boost_INCLUDE_DIRS}
)
include_directories(
src/
@ -41,4 +38,15 @@ target_link_libraries(${PROJECT_NAME}
tidy-share
${PUGIXML_LIBRARIES}
curlcpp
${Boost_LIBRARIES}
)
#unset those variables so cmake files from dependencies won't complain about
#new/old policy. Those unset statements can be removed once both libraries bump
#their cmake_minimum_required to 3.0+.
unset(PROJECT_VERSION_MAJOR)
unset(PROJECT_VERSION_MINOR)
unset(PROJECT_VERSION)
set(BUILD_SHARED_LIB ON) #for tidy
add_subdirectory(lib/tidy)
add_subdirectory(lib/curlcpp)

View file

@ -6,32 +6,84 @@
#include <sstream>
#include <utility>
#include <ciso646>
#include <boost/program_options.hpp>
#include <unistd.h>
#define STRINGIZE_IMPL(s) #s
#define STRINGIZE(s) STRINGIZE_IMPL(s)
namespace po = boost::program_options;
namespace {
typedef std::pair<int, int> LineColType;
LineColType line_col_from_offset ( ptrdiff_t parOffset, const std::string& parData );
const char* const g_version_string =
PROGRAM_NAME " v" STRINGIZE(VERSION_MAJOR) "." STRINGIZE(VERSION_MINOR)
#if VERSION_BETA
"b"
#endif
;
bool parse_commandline (int parArgc, char* parArgv[], po::variables_map parVarMap) {
po::options_description desc("General");
desc.add_options()
("help,h", "Produces this help message")
("version", "Prints the program's version and quits")
;
po::options_description positional_options("Positional options");
positional_options.add_options()
("input-url", po::value<std::string>(), "Input URL")
("xpath", po::value<std::string>(), "XPath expression")
;
po::options_description all("Available options");
all.add(desc).add(positional_options);
po::positional_options_description pd;
pd.add("input-url", 1).add("xpath", 1);
po::store(po::command_line_parser(parArgc, parArgv).options(all).positional(pd).run(), parVarMap);
po::notify(parVarMap);
if (parVarMap.count("help")) {
po::options_description visible("Available options");
visible.add(desc);
std::cout << "Usage: " << PROGRAM_NAME << " [options...] <url> <xpath>\n";
std::cout << visible;
return true;
}
else if (parVarMap.count("version")) {
std::cout << g_version_string;
std::cout << " git revision " << VERSION_GIT << "\n";
return true;
}
if (parVarMap.count("input-url") == 0) {
std::cerr << "No input url specified, use --help for help" << std::endl;
//return 2;
return true;
}
if (parVarMap.count("xpath") == 0) {
std::cerr << "No XPath expression specified, use --help for help" << std::endl;
//return 2;
return true;
}
return false;
}
} //unnamed namespace
int main (int argc, char* argv[]) {
if (argc != 3) {
std::cerr << PROGRAM_NAME << "v" << VERSION_MAJOR << "." << VERSION_MINOR;
#if VERSION_BETA
std::cerr << "b";
#endif
std::cerr << " git revision " << VERSION_GIT << "\n";
std::cerr << "Default user agent is \"" << DEFAULT_USER_AGENT << "\"\n";
std::cerr << "Usage: scraper <URL> <XPath>" << std::endl;
return 2;
po::variables_map vm;
if (parse_commandline(argc, argv, vm)) {
return 0;
}
const char* const& url = argv[1];
const char* const& xpath = argv[2];
const auto url = vm["input-url"].as<std::string>();
const auto xpath = vm["xpath"].as<std::string>();
#if !defined(NDEBUG)
std::cout << "URL : " << url << "\n";
std::cout << "XPath: " << xpath << std::endl;
#endif
std::string tidyHtml = duck::getCleanHtml(url, false, false);
std::string tidyHtml = duck::getCleanHtml(vm["input-url"].as<std::string>(), false, false);
{
pugi::xml_document doc;
@ -45,7 +97,7 @@ int main (int argc, char* argv[]) {
return 1;
}
pugi::xpath_node_set xpathRes = doc.select_nodes(xpath);
pugi::xpath_node_set xpathRes = doc.select_nodes(xpath.c_str());
for (pugi::xpath_node_set::const_iterator itFind(xpathRes.begin()), itFindEND(xpathRes.end()); itFind != itFindEND; ++itFind) {
const pugi::xpath_node& node = *itFind;
if (node.node()) {