Parse options through boost program_options.
This commit is contained in:
parent
4f85fa01a9
commit
8e517e5de9
2 changed files with 83 additions and 23 deletions
|
@ -1,19 +1,15 @@
|
||||||
cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
|
||||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
|
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
|
||||||
project(duckscraper CXX)
|
project(duckscraper VERSION 0.2 LANGUAGES CXX)
|
||||||
|
|
||||||
add_subdirectory(lib/tidy)
|
|
||||||
add_subdirectory(lib/curlcpp)
|
|
||||||
|
|
||||||
include(GetGitRevisionDescription)
|
include(GetGitRevisionDescription)
|
||||||
find_package(PugiXML REQUIRED)
|
find_package(PugiXML REQUIRED)
|
||||||
|
find_package(Boost 1.32.0 COMPONENTS program_options)
|
||||||
|
|
||||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -std=c++11 -Wall -Wextra -g -O0 -fno-omit-frame-pointer")
|
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -std=c++11 -Wall -Wextra -g -O0 -fno-omit-frame-pointer")
|
||||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++11 -Wall -Wextra -g -O3 -fomit-frame-pointer")
|
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++11 -Wall -Wextra -g -O3 -fomit-frame-pointer")
|
||||||
|
|
||||||
set(DEFAULT_USER_AGENT "DuckScraper")
|
set(DEFAULT_USER_AGENT "DuckScraper")
|
||||||
set(PROJECT_VERSION_MAJOR "0")
|
|
||||||
set(PROJECT_VERSION_MINOR "1")
|
|
||||||
set(PROJECT_VERSION_BETA "1")
|
set(PROJECT_VERSION_BETA "1")
|
||||||
get_git_head_revision(GIT_REFSPEC PROJECT_VERSION_GIT)
|
get_git_head_revision(GIT_REFSPEC PROJECT_VERSION_GIT)
|
||||||
|
|
||||||
|
@ -25,7 +21,8 @@ configure_file(
|
||||||
include_directories(SYSTEM
|
include_directories(SYSTEM
|
||||||
lib/tidy/include
|
lib/tidy/include
|
||||||
${PUGIXML_INCLUDE_DIR}
|
${PUGIXML_INCLUDE_DIR}
|
||||||
${CURLCPP_SOURCE_DIR}/include
|
lib/curlcpp/include
|
||||||
|
${Boost_INCLUDE_DIRS}
|
||||||
)
|
)
|
||||||
include_directories(
|
include_directories(
|
||||||
src/
|
src/
|
||||||
|
@ -41,4 +38,15 @@ target_link_libraries(${PROJECT_NAME}
|
||||||
tidy-share
|
tidy-share
|
||||||
${PUGIXML_LIBRARIES}
|
${PUGIXML_LIBRARIES}
|
||||||
curlcpp
|
curlcpp
|
||||||
|
${Boost_LIBRARIES}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
#unset those variables so cmake files from dependencies won't complain about
|
||||||
|
#new/old policy. Those unset statements can be removed once both libraries bump
|
||||||
|
#their cmake_minimum_required to 3.0+.
|
||||||
|
unset(PROJECT_VERSION_MAJOR)
|
||||||
|
unset(PROJECT_VERSION_MINOR)
|
||||||
|
unset(PROJECT_VERSION)
|
||||||
|
set(BUILD_SHARED_LIB ON) #for tidy
|
||||||
|
add_subdirectory(lib/tidy)
|
||||||
|
add_subdirectory(lib/curlcpp)
|
||||||
|
|
82
src/main.cpp
82
src/main.cpp
|
@ -6,32 +6,84 @@
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <ciso646>
|
#include <ciso646>
|
||||||
|
#include <boost/program_options.hpp>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#define STRINGIZE_IMPL(s) #s
|
||||||
|
#define STRINGIZE(s) STRINGIZE_IMPL(s)
|
||||||
|
|
||||||
|
namespace po = boost::program_options;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
typedef std::pair<int, int> LineColType;
|
typedef std::pair<int, int> LineColType;
|
||||||
|
|
||||||
LineColType line_col_from_offset ( ptrdiff_t parOffset, const std::string& parData );
|
LineColType line_col_from_offset ( ptrdiff_t parOffset, const std::string& parData );
|
||||||
|
|
||||||
|
const char* const g_version_string =
|
||||||
|
PROGRAM_NAME " v" STRINGIZE(VERSION_MAJOR) "." STRINGIZE(VERSION_MINOR)
|
||||||
|
#if VERSION_BETA
|
||||||
|
"b"
|
||||||
|
#endif
|
||||||
|
;
|
||||||
|
|
||||||
|
bool parse_commandline (int parArgc, char* parArgv[], po::variables_map parVarMap) {
|
||||||
|
po::options_description desc("General");
|
||||||
|
desc.add_options()
|
||||||
|
("help,h", "Produces this help message")
|
||||||
|
("version", "Prints the program's version and quits")
|
||||||
|
;
|
||||||
|
po::options_description positional_options("Positional options");
|
||||||
|
positional_options.add_options()
|
||||||
|
("input-url", po::value<std::string>(), "Input URL")
|
||||||
|
("xpath", po::value<std::string>(), "XPath expression")
|
||||||
|
;
|
||||||
|
po::options_description all("Available options");
|
||||||
|
all.add(desc).add(positional_options);
|
||||||
|
po::positional_options_description pd;
|
||||||
|
pd.add("input-url", 1).add("xpath", 1);
|
||||||
|
po::store(po::command_line_parser(parArgc, parArgv).options(all).positional(pd).run(), parVarMap);
|
||||||
|
po::notify(parVarMap);
|
||||||
|
|
||||||
|
if (parVarMap.count("help")) {
|
||||||
|
po::options_description visible("Available options");
|
||||||
|
visible.add(desc);
|
||||||
|
std::cout << "Usage: " << PROGRAM_NAME << " [options...] <url> <xpath>\n";
|
||||||
|
std::cout << visible;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if (parVarMap.count("version")) {
|
||||||
|
std::cout << g_version_string;
|
||||||
|
std::cout << " git revision " << VERSION_GIT << "\n";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parVarMap.count("input-url") == 0) {
|
||||||
|
std::cerr << "No input url specified, use --help for help" << std::endl;
|
||||||
|
//return 2;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (parVarMap.count("xpath") == 0) {
|
||||||
|
std::cerr << "No XPath expression specified, use --help for help" << std::endl;
|
||||||
|
//return 2;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
} //unnamed namespace
|
} //unnamed namespace
|
||||||
|
|
||||||
int main (int argc, char* argv[]) {
|
int main (int argc, char* argv[]) {
|
||||||
if (argc != 3) {
|
po::variables_map vm;
|
||||||
std::cerr << PROGRAM_NAME << "v" << VERSION_MAJOR << "." << VERSION_MINOR;
|
if (parse_commandline(argc, argv, vm)) {
|
||||||
#if VERSION_BETA
|
return 0;
|
||||||
std::cerr << "b";
|
|
||||||
#endif
|
|
||||||
std::cerr << " git revision " << VERSION_GIT << "\n";
|
|
||||||
std::cerr << "Default user agent is \"" << DEFAULT_USER_AGENT << "\"\n";
|
|
||||||
std::cerr << "Usage: scraper <URL> <XPath>" << std::endl;
|
|
||||||
return 2;
|
|
||||||
}
|
}
|
||||||
|
const auto url = vm["input-url"].as<std::string>();
|
||||||
const char* const& url = argv[1];
|
const auto xpath = vm["xpath"].as<std::string>();
|
||||||
const char* const& xpath = argv[2];
|
#if !defined(NDEBUG)
|
||||||
|
|
||||||
std::cout << "URL : " << url << "\n";
|
std::cout << "URL : " << url << "\n";
|
||||||
std::cout << "XPath: " << xpath << std::endl;
|
std::cout << "XPath: " << xpath << std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
std::string tidyHtml = duck::getCleanHtml(url, false, false);
|
std::string tidyHtml = duck::getCleanHtml(vm["input-url"].as<std::string>(), false, false);
|
||||||
|
|
||||||
{
|
{
|
||||||
pugi::xml_document doc;
|
pugi::xml_document doc;
|
||||||
|
@ -45,7 +97,7 @@ int main (int argc, char* argv[]) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
pugi::xpath_node_set xpathRes = doc.select_nodes(xpath);
|
pugi::xpath_node_set xpathRes = doc.select_nodes(xpath.c_str());
|
||||||
for (pugi::xpath_node_set::const_iterator itFind(xpathRes.begin()), itFindEND(xpathRes.end()); itFind != itFindEND; ++itFind) {
|
for (pugi::xpath_node_set::const_iterator itFind(xpathRes.begin()), itFindEND(xpathRes.end()); itFind != itFindEND; ++itFind) {
|
||||||
const pugi::xpath_node& node = *itFind;
|
const pugi::xpath_node& node = *itFind;
|
||||||
if (node.node()) {
|
if (node.node()) {
|
||||||
|
|
Loading…
Reference in a new issue