Working on a replacement for restc-cpp
This commit is contained in:
parent
440fbd52b9
commit
05c03c2daf
5 changed files with 266 additions and 0 deletions
|
@ -103,6 +103,8 @@ executable(meson.project_name(),
|
|||
'eventia/event.cpp',
|
||||
'timer_oro_api.cpp',
|
||||
'oro/originsdb.cpp',
|
||||
'oro/page_fetch.cpp',
|
||||
'oro/http_header_parse.cpp',
|
||||
project_config_file,
|
||||
install: true,
|
||||
dependencies: lib_deps,
|
||||
|
|
115
src/oro/http_header_parse.cpp
Normal file
115
src/oro/http_header_parse.cpp
Normal file
|
@ -0,0 +1,115 @@
|
|||
/* Copyright 2020, Michele Santullo
|
||||
* This file is part of orotool.
|
||||
*
|
||||
* Orotool is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Orotool is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Orotool. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "private/http_header_parse.hpp"
|
||||
#include <boost/spirit/home/x3.hpp>
|
||||
#include <boost/fusion/include/adapt_struct.hpp>
|
||||
#include <boost/fusion/include/std_pair.hpp>
|
||||
|
||||
namespace x3 = boost::spirit::x3;
|
||||
namespace ascii = boost::spirit::x3::ascii;
|
||||
|
||||
namespace boost::spirit::x3::traits {
|
||||
|
||||
template <typename Char, typename Trait>
|
||||
struct is_range<std::basic_string_view<Char, Trait>> : boost::mpl::true_ {};
|
||||
|
||||
} //namespace boost::spirit::x3::traits
|
||||
|
||||
BOOST_FUSION_ADAPT_STRUCT(oro::ParsedHeader,
|
||||
version, code, message, fields
|
||||
);
|
||||
|
||||
namespace oro {
|
||||
namespace {
|
||||
namespace parser {
|
||||
//HTTP/1.1 200 OK
|
||||
//X-RateLimit-Remaining: 5
|
||||
//Retry-After: 86400
|
||||
//Server: soapui
|
||||
//X-RateLimit-Reset: 1592918516
|
||||
//X-RateLimit-Limit: 6
|
||||
//Date: Mon, 22 Jun 2020 13:21:55 GMT
|
||||
//Set-Cookie: __cfduid=dac1342771e458af0ce6c7b462db1e18d1592832115; expires=Wed, 22-Jul-20 13:21:55 GMT; path=/; domain=.originsro.org; HttpOnly; SameSite=Lax
|
||||
//Content-Type: application/json
|
||||
//Content-Encoding: gzip
|
||||
//Content-Length: 116629
|
||||
|
||||
using x3::lit;
|
||||
using x3::digit;
|
||||
using x3::no_skip;
|
||||
using x3::string;
|
||||
using x3::uint_;
|
||||
using x3::lexeme;
|
||||
using x3::char_;
|
||||
using x3::eol;
|
||||
|
||||
template <typename Subject>
|
||||
struct raw_directive : x3::raw_directive<Subject> {
|
||||
using x3::raw_directive<Subject>::raw_directive;
|
||||
|
||||
template <typename Iterator, typename Context, typename RContext, typename Attribute>
|
||||
bool parse(Iterator& first, Iterator const& last, Context const& context, RContext& rcontext, Attribute& attr) const {
|
||||
x3::skip_over(first, last, context);
|
||||
Iterator saved = first;
|
||||
if (this->subject.parse(first, last, context, rcontext, x3::unused)) {
|
||||
attr = { saved, typename Attribute::size_type(first - saved) };
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
struct raw_gen {
|
||||
template <typename Subject>
|
||||
raw_directive<typename x3::extension::as_parser<Subject>::value_type> operator[](Subject subject) const {
|
||||
return { x3::as_parser(std::move(subject)) };
|
||||
}
|
||||
};
|
||||
|
||||
auto const raw = raw_gen{};
|
||||
|
||||
x3::rule<class HeaderEntry, std::pair<std::string_view, std::string_view>> header_entry = "header_entry";
|
||||
auto const header_entry_def =
|
||||
raw[lexeme[+(char_ - ':')]] >> ':' >> raw[lexeme[+(char_ - eol)]]
|
||||
;
|
||||
|
||||
x3::rule<class HttpHeaderRule, ParsedHeader> http_header = "http_header";
|
||||
auto const http_header_def =
|
||||
no_skip[lit("HTTP/") >> raw[+digit >> -(string(".") >> +digit)]] > uint_ >
|
||||
raw[lexeme[+(char_ - eol)]] > eol >>
|
||||
(header_entry % eol)
|
||||
;
|
||||
|
||||
BOOST_SPIRIT_DEFINE(http_header, header_entry);
|
||||
} //namespace parser
|
||||
} //unnamed namespace
|
||||
|
||||
ParsedHeader header_parse (std::string_view text) {
|
||||
ParsedHeader retval;
|
||||
auto beg = text.begin();
|
||||
const bool result = x3::phrase_parse(
|
||||
beg,
|
||||
text.end(),
|
||||
parser::http_header,
|
||||
x3::space - x3::eol,
|
||||
retval
|
||||
);
|
||||
return retval;
|
||||
}
|
||||
|
||||
} //namespace oro
|
80
src/oro/page_fetch.cpp
Normal file
80
src/oro/page_fetch.cpp
Normal file
|
@ -0,0 +1,80 @@
|
|||
/* Copyright 2020, Michele Santullo
|
||||
* This file is part of orotool.
|
||||
*
|
||||
* Orotool is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Orotool is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Orotool. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "private/page_fetch.hpp"
|
||||
#include "private/http_header_parse.hpp"
|
||||
#include <curl_easy.h>
|
||||
#include <curl_pair.h>
|
||||
#include <curl_ios.h>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
|
||||
namespace oro {
|
||||
namespace {
|
||||
bool is_https (std::string_view parUrl) {
|
||||
const char protocol[] = "https://";
|
||||
const size_t protocolLen = sizeof(protocol) / sizeof(protocol[0]) - 1;
|
||||
if (parUrl.size() < protocolLen)
|
||||
return false;
|
||||
|
||||
return std::equal(protocol, protocol + protocolLen, parUrl.begin());
|
||||
}
|
||||
} //unnamed namespace
|
||||
|
||||
HttpResponse page_fetch (std::string_view url, std::string_view user_agent) {
|
||||
using curl::curl_pair;
|
||||
|
||||
std::ostringstream body_oss;
|
||||
std::ostringstream header_oss;
|
||||
curl::curl_ios<std::ostringstream> body(body_oss);
|
||||
curl::curl_ios<std::ostringstream> header(header_oss);
|
||||
curl::curl_easy easy;
|
||||
|
||||
easy.add<CURLOPT_WRITEFUNCTION>(header.get_function());
|
||||
easy.add<CURLOPT_HEADERDATA>(header.get_stream());
|
||||
easy.add<CURLOPT_WRITEDATA>(body.get_stream());
|
||||
|
||||
easy.add(curl_pair<CURLoption, std::string>(CURLOPT_URL, std::string(url)));
|
||||
easy.add<CURLOPT_FOLLOWLOCATION>(1L);
|
||||
if (is_https(url)) {
|
||||
easy.add<CURLOPT_SSL_VERIFYPEER>(true);
|
||||
easy.add<CURLOPT_SSL_VERIFYHOST>(true);
|
||||
}
|
||||
easy.add<CURLOPT_ACCEPT_ENCODING>("gzip");
|
||||
easy.add<CURLOPT_HTTP_CONTENT_DECODING>(1L);
|
||||
easy.add(curl_pair<CURLoption, std::string>(CURLOPT_USERAGENT, std::string(user_agent)));
|
||||
|
||||
easy.perform();
|
||||
|
||||
HttpResponse resp;
|
||||
resp.code = easy.get_info<CURLINFO_RESPONSE_CODE>().get();
|
||||
resp.raw = header_oss.str();
|
||||
resp.raw += "\n";
|
||||
const std::size_t body_start = resp.raw.size();
|
||||
resp.raw += body_oss.str();
|
||||
std::string_view raw_view(resp.raw);
|
||||
resp.body = raw_view.substr(body_start);
|
||||
|
||||
auto parsed_header = header_parse(raw_view.substr(0, body_start));
|
||||
resp.header = std::move(parsed_header.fields);
|
||||
assert(resp.code == parsed_header.code);
|
||||
resp.http_ver = parsed_header.version;
|
||||
resp.code_desc = parsed_header.message;
|
||||
return resp;
|
||||
}
|
||||
} //namespace oro
|
33
src/oro/private/http_header_parse.hpp
Normal file
33
src/oro/private/http_header_parse.hpp
Normal file
|
@ -0,0 +1,33 @@
|
|||
/* Copyright 2020, Michele Santullo
|
||||
* This file is part of orotool.
|
||||
*
|
||||
* Orotool is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Orotool is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Orotool. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
namespace oro {
|
||||
struct ParsedHeader {
|
||||
std::string_view version;
|
||||
unsigned int code;
|
||||
std::string_view message;
|
||||
std::vector<std::pair<std::string_view, std::string_view>> fields;
|
||||
};
|
||||
|
||||
ParsedHeader header_parse (std::string_view text);
|
||||
} //namespace oro
|
36
src/oro/private/page_fetch.hpp
Normal file
36
src/oro/private/page_fetch.hpp
Normal file
|
@ -0,0 +1,36 @@
|
|||
/* Copyright 2020, Michele Santullo
|
||||
* This file is part of orotool.
|
||||
*
|
||||
* Orotool is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Orotool is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Orotool. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
namespace oro {
|
||||
struct HttpResponse {
|
||||
std::string raw;
|
||||
std::vector<std::pair<std::string_view, std::string_view>> header;
|
||||
std::string_view body;
|
||||
std::string_view http_ver;
|
||||
std::string_view code_desc;
|
||||
unsigned int code;
|
||||
};
|
||||
|
||||
HttpResponse page_fetch (std::string_view url, std::string_view user_agent);
|
||||
} //namespace oro
|
Loading…
Reference in a new issue