Parse options through boost program_options.
This commit is contained in:
parent
4f85fa01a9
commit
8e517e5de9
2 changed files with 83 additions and 23 deletions
|
@ -1,19 +1,15 @@
|
|||
cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
|
||||
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
|
||||
project(duckscraper CXX)
|
||||
|
||||
add_subdirectory(lib/tidy)
|
||||
add_subdirectory(lib/curlcpp)
|
||||
project(duckscraper VERSION 0.2 LANGUAGES CXX)
|
||||
|
||||
include(GetGitRevisionDescription)
|
||||
find_package(PugiXML REQUIRED)
|
||||
find_package(Boost 1.32.0 COMPONENTS program_options)
|
||||
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -std=c++11 -Wall -Wextra -g -O0 -fno-omit-frame-pointer")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++11 -Wall -Wextra -g -O3 -fomit-frame-pointer")
|
||||
|
||||
set(DEFAULT_USER_AGENT "DuckScraper")
|
||||
set(PROJECT_VERSION_MAJOR "0")
|
||||
set(PROJECT_VERSION_MINOR "1")
|
||||
set(PROJECT_VERSION_BETA "1")
|
||||
get_git_head_revision(GIT_REFSPEC PROJECT_VERSION_GIT)
|
||||
|
||||
|
@ -25,7 +21,8 @@ configure_file(
|
|||
include_directories(SYSTEM
|
||||
lib/tidy/include
|
||||
${PUGIXML_INCLUDE_DIR}
|
||||
${CURLCPP_SOURCE_DIR}/include
|
||||
lib/curlcpp/include
|
||||
${Boost_INCLUDE_DIRS}
|
||||
)
|
||||
include_directories(
|
||||
src/
|
||||
|
@ -41,4 +38,15 @@ target_link_libraries(${PROJECT_NAME}
|
|||
tidy-share
|
||||
${PUGIXML_LIBRARIES}
|
||||
curlcpp
|
||||
${Boost_LIBRARIES}
|
||||
)
|
||||
|
||||
#unset those variables so cmake files from dependencies won't complain about
|
||||
#new/old policy. Those unset statements can be removed once both libraries bump
|
||||
#their cmake_minimum_required to 3.0+.
|
||||
unset(PROJECT_VERSION_MAJOR)
|
||||
unset(PROJECT_VERSION_MINOR)
|
||||
unset(PROJECT_VERSION)
|
||||
set(BUILD_SHARED_LIB ON) #for tidy
|
||||
add_subdirectory(lib/tidy)
|
||||
add_subdirectory(lib/curlcpp)
|
||||
|
|
82
src/main.cpp
82
src/main.cpp
|
@ -6,32 +6,84 @@
|
|||
#include <sstream>
|
||||
#include <utility>
|
||||
#include <ciso646>
|
||||
#include <boost/program_options.hpp>
|
||||
#include <unistd.h>
|
||||
|
||||
#define STRINGIZE_IMPL(s) #s
|
||||
#define STRINGIZE(s) STRINGIZE_IMPL(s)
|
||||
|
||||
namespace po = boost::program_options;
|
||||
|
||||
namespace {
|
||||
typedef std::pair<int, int> LineColType;
|
||||
|
||||
LineColType line_col_from_offset ( ptrdiff_t parOffset, const std::string& parData );
|
||||
|
||||
const char* const g_version_string =
|
||||
PROGRAM_NAME " v" STRINGIZE(VERSION_MAJOR) "." STRINGIZE(VERSION_MINOR)
|
||||
#if VERSION_BETA
|
||||
"b"
|
||||
#endif
|
||||
;
|
||||
|
||||
bool parse_commandline (int parArgc, char* parArgv[], po::variables_map parVarMap) {
|
||||
po::options_description desc("General");
|
||||
desc.add_options()
|
||||
("help,h", "Produces this help message")
|
||||
("version", "Prints the program's version and quits")
|
||||
;
|
||||
po::options_description positional_options("Positional options");
|
||||
positional_options.add_options()
|
||||
("input-url", po::value<std::string>(), "Input URL")
|
||||
("xpath", po::value<std::string>(), "XPath expression")
|
||||
;
|
||||
po::options_description all("Available options");
|
||||
all.add(desc).add(positional_options);
|
||||
po::positional_options_description pd;
|
||||
pd.add("input-url", 1).add("xpath", 1);
|
||||
po::store(po::command_line_parser(parArgc, parArgv).options(all).positional(pd).run(), parVarMap);
|
||||
po::notify(parVarMap);
|
||||
|
||||
if (parVarMap.count("help")) {
|
||||
po::options_description visible("Available options");
|
||||
visible.add(desc);
|
||||
std::cout << "Usage: " << PROGRAM_NAME << " [options...] <url> <xpath>\n";
|
||||
std::cout << visible;
|
||||
return true;
|
||||
}
|
||||
else if (parVarMap.count("version")) {
|
||||
std::cout << g_version_string;
|
||||
std::cout << " git revision " << VERSION_GIT << "\n";
|
||||
return true;
|
||||
}
|
||||
|
||||
if (parVarMap.count("input-url") == 0) {
|
||||
std::cerr << "No input url specified, use --help for help" << std::endl;
|
||||
//return 2;
|
||||
return true;
|
||||
}
|
||||
if (parVarMap.count("xpath") == 0) {
|
||||
std::cerr << "No XPath expression specified, use --help for help" << std::endl;
|
||||
//return 2;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} //unnamed namespace
|
||||
|
||||
int main (int argc, char* argv[]) {
|
||||
if (argc != 3) {
|
||||
std::cerr << PROGRAM_NAME << "v" << VERSION_MAJOR << "." << VERSION_MINOR;
|
||||
#if VERSION_BETA
|
||||
std::cerr << "b";
|
||||
#endif
|
||||
std::cerr << " git revision " << VERSION_GIT << "\n";
|
||||
std::cerr << "Default user agent is \"" << DEFAULT_USER_AGENT << "\"\n";
|
||||
std::cerr << "Usage: scraper <URL> <XPath>" << std::endl;
|
||||
return 2;
|
||||
po::variables_map vm;
|
||||
if (parse_commandline(argc, argv, vm)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char* const& url = argv[1];
|
||||
const char* const& xpath = argv[2];
|
||||
|
||||
const auto url = vm["input-url"].as<std::string>();
|
||||
const auto xpath = vm["xpath"].as<std::string>();
|
||||
#if !defined(NDEBUG)
|
||||
std::cout << "URL : " << url << "\n";
|
||||
std::cout << "XPath: " << xpath << std::endl;
|
||||
#endif
|
||||
|
||||
std::string tidyHtml = duck::getCleanHtml(url, false, false);
|
||||
std::string tidyHtml = duck::getCleanHtml(vm["input-url"].as<std::string>(), false, false);
|
||||
|
||||
{
|
||||
pugi::xml_document doc;
|
||||
|
@ -45,7 +97,7 @@ int main (int argc, char* argv[]) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
pugi::xpath_node_set xpathRes = doc.select_nodes(xpath);
|
||||
pugi::xpath_node_set xpathRes = doc.select_nodes(xpath.c_str());
|
||||
for (pugi::xpath_node_set::const_iterator itFind(xpathRes.begin()), itFindEND(xpathRes.end()); itFind != itFindEND; ++itFind) {
|
||||
const pugi::xpath_node& node = *itFind;
|
||||
if (node.node()) {
|
||||
|
|
Loading…
Reference in a new issue