commit 3dcfcc7f0c731c228526087769cfb355445603c4 Author: King_DuckZ Date: Wed Apr 2 20:41:48 2025 +0100 Working proof of concept diff --git a/meson.build b/meson.build new file mode 100644 index 0000000..4fa2fc9 --- /dev/null +++ b/meson.build @@ -0,0 +1,17 @@ +project('ducktorrent', 'cpp', + version: '0.1.0', + meson_version: '>=0.56.0', + default_options: [ + 'buildtype=release', + 'cpp_std=gnu++20', + 'b_ndebug=if-release', + 'c_std=c99', + ], +) + +boost_dep = dependency('boost', version: '>=1.75.0') + +libstriezel_proj = subproject('libstriezel') +libstriezel_dep = libstriezel_proj.get_variable('libstriezel_dep') + +subdir('src') diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..355b957 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,213 @@ +#include "parser.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { +constexpr char g_test[] = "d8:announce75:http://tracker.bakabt.me:2710/f53895da71a9546b3b14682e63cb9069/announce.php7:comment21:http://www.bakabt.me/10:created by37:ruTorrent (PHP Class - Adrien Gibrat)13:creation datei1394860194ee"; + +std::string load_file (const std::string& path) { + std::ifstream ifs(path); + ifs >> std::noskipws; + + return std::string( + std::istreambuf_iterator(ifs), + std::istreambuf_iterator() + ); +} + +bool is_printable(std::string_view s) { + return std::find_if(s.cbegin(), s.cend(), + [](unsigned char c){ return not std::isprint(c); } + ) == s.cend(); +} + +struct PrintVisitor { + PrintVisitor(int depth=0, bool skip_first=true) : + depth(depth), first_line(skip_first), skip_first(skip_first) {} + + void print_tabs(int extra=0) { + if (!first_line) { + for (int z = 0; z < depth+extra; ++z) std::cout << '\t'; + } + else { + first_line = false; + } + } + + void reset() { + first_line = skip_first; + } + + void operator() (long long value) { + print_tabs(); + std::cout << value << '\n'; + } + + void operator() (const duck::TorrentStringType& value) { + constexpr std::size_t max_size = 41; + + print_tabs(); + + std::string_view str{value}; + std::string_view ellip; + if (is_printable(value)) { + if (value.size() > max_size + 3) { + str = str.substr(0, max_size); + ellip = "..."; + } + } + else { + str = ""; + } + + std::cout << "\"" << str << ellip << "\"[" << value.size() << "]\n"; + } + + void operator() (const std::vector& value) { + print_tabs(); + std::cout << "[\n"; + + PrintVisitor new_visitor(depth+1, false); + for (const auto& item : value) { + boost::apply_visitor(new_visitor, item); + } + + print_tabs(); + std::cout << "]\n"; + } + + void operator() (const std::map& value) { + print_tabs(); + std::cout << "{\n"; + + PrintVisitor new_visitor(depth+1); + for (const auto& item : value) { + print_tabs(1); + std::cout << item.first << " => "; + + new_visitor.reset(); + boost::apply_visitor(new_visitor, item.second); + } + print_tabs(); + std::cout << "}\n"; + } + + int depth; + bool first_line; + bool skip_first; +}; + +struct FindHashesVisitor : boost::static_visitor { + std::string_view operator() (long long value) { + return {}; + } + + std::string_view operator() (const duck::TorrentStringType& value) { + if (this_is_pieces) + return value; + else + return {}; + } + + std::string_view operator() (const std::vector& value) { + return {}; + } + + std::string_view operator() (const std::map& value) { + if (this_is_pieces) + return {}; + + for (const auto& item : value) { + if (item.first == "info") { + return boost::apply_visitor(*this, item.second); + } + else if (item.first == "pieces") { + this_is_pieces = true; + return boost::apply_visitor(*this, item.second); + } + } + return {}; + } + + bool this_is_pieces{false}; +}; + +std::vector> collect_hashes (std::string_view hashes) { + typedef std::array array_type; + + if (hashes.empty()) + return {}; + + constexpr std::size_t hash_size = 20; + constexpr std::size_t uint_count = std::tuple_size(); + static_assert(sizeof(array_type::value_type) * uint_count == hash_size); + + if (hashes.size() % hash_size != 0) + throw std::runtime_error("Bad pieces array size " + std::to_string(hashes.size())); + + std::vector retval(hashes.size() / hash_size); + const char* src = hashes.data(); + for (std::size_t z = 0; z < retval.size(); ++z, src+=hash_size) { + //std::copy_n(src, hash_size, reinterpret_cast(retval[z].data())); + for (std::size_t n = 0; n < uint_count; ++n) { + char* const out_uint = reinterpret_cast(retval[z].data() + n); + for (std::size_t u = 0; u < sizeof(std::uint32_t); ++u) { + out_uint[u] = src[n * sizeof(std::uint32_t) + sizeof(std::uint32_t) - u - 1]; + } + } + } + return retval; +} +} //unnamed namespace + +int main(int argc, const char* argv[]) { + using duck::parse_torrent; + + if (argc != 2) { + std::cerr << "Wrong number of parameters. Usage:\n" + << argv[0] << " \n"; + return 2; + } + + std::string full_torrent = load_file(argv[1]); + std::cout << "Loaded file into string of size " << full_torrent.size() << '\n'; + std::vector values = parse_torrent(full_torrent); + + for (const auto& value : values) { + PrintVisitor visitor; + boost::apply_visitor(visitor, value); + } + + if (values.size() == 1) { + FindHashesVisitor visitor; + auto source_hashes = boost::apply_visitor(visitor, values.front()); + std::cout << "Got source_hashes with size " << source_hashes.size() << '\n'; + auto hashes = collect_hashes(source_hashes); + std::cout << "Got " << hashes.size() << " hashes\n"; + + if (not hashes.empty()) { + SHA1::MessageDigest hash_found; + std::copy_n(hashes.front().begin(), 5, hash_found.hash); + std::cout << hash_found.toHexString() << '\n'; + } + } + + { + SHA1::MessageDigest hash160; + SHA1::FileSource file_source; + const char* const hash_path = argv[1]; + file_source.open(hash_path); + hash160 = SHA1::computeFromSource(file_source); + std::cout << hash160.toHexString() << " " << hash_path << '\n'; + } +} diff --git a/src/meson.build b/src/meson.build new file mode 100644 index 0000000..e611be6 --- /dev/null +++ b/src/meson.build @@ -0,0 +1,9 @@ +executable(meson.project_name(), + 'main.cpp', + 'parser.cpp', + dependencies: [ + boost_dep, + libstriezel_dep, + ], + install: true, +) diff --git a/src/parser.cpp b/src/parser.cpp new file mode 100644 index 0000000..65443b1 --- /dev/null +++ b/src/parser.cpp @@ -0,0 +1,131 @@ +#if !defined(NDEBUG) +//# define BOOST_SPIRIT_X3_DEBUG +#endif + +#include "parser.hpp" + +#include +#include +#if !defined(NDEBUG) +# include +#endif + +namespace boost::spirit::x3::traits { +template +struct is_range> : boost::mpl::true_ {}; +} //namespace boost::spirit::x3::traits + +namespace duck { +namespace x3 = boost::spirit::x3; + +#if defined(BOOST_SPIRIT_X3_DEBUG) +std::ostream& operator<<(std::ostream& os, const std::vector& ot) { + os << "'; + return os; +} + +std::ostream& operator<<(std::ostream& os, const std::map& ot) { + os << "'; + return os; +} +#endif + +namespace workaround { +template +struct raw_directive : x3::raw_directive { + using x3::raw_directive::raw_directive; + + template + bool parse( + Iterator& first, + Iterator const& last, + Context const& context, + RContext& rcontext, + Attribute& attr + ) const { + x3::skip_over(first, last, context); + Iterator saved = first; + if (this->subject.parse(first, last, context, rcontext, x3::unused)) { + attr = { saved, typename Attribute::size_type(first - saved) }; + return true; + } + return false; + } +}; + +struct raw_gen { + template + raw_directive::value_type> + operator[](Subject subject) const { + return { x3::as_parser(std::move(subject)) }; + } +}; + +auto const raw = raw_gen{}; + +template +struct as_type { + template + constexpr auto operator[](E e) const { return x3::rule {} = e; } +}; + +template +static inline constexpr as_type as; +} //namespace workaround + +namespace { +namespace parser { + +using x3::lit; +using x3::char_; +using x3::uint_; +using x3::string; +using x3::eps; +using x3::omit; +using workaround::raw; +using workaround::as; +//using x3::ascii::digit; + +class len_tag; //see https://github.com/boostorg/spirit/issues/680 + +const x3::rule> torrent = "torrent"; +const x3::rule torrent_value = "torrent_value"; +const x3::rule torrent_int = "torrent_int"; +const x3::rule torrent_string = "torrent_string"; +const x3::rule> torrent_list = "torrent_list"; +const x3::rule> torrent_dict = "torrent_dict"; + +auto torrent_value_def = torrent_int | torrent_string | torrent_list | torrent_dict; + +//auto torrent_int_def = lit('i') >> (string("0") | (char_("1-9") >> *digit)) >> lit('e'); +auto torrent_int_def = lit('i') >> x3::long_long >> lit('e'); + +auto torrent_string_def = x3::with(0u)[ + omit[uint_[([](auto& ctx) { x3::get(ctx) = x3::_attr(ctx); })] >> ':'] >> raw[*( + eps[([](auto& ctx) { x3::_pass(ctx) = x3::get(ctx)-- > 0; })] >> as[char_] + )] +]; +auto torrent_list_def = lit('l') >> *torrent_value >> 'e'; +auto torrent_dict_def = lit('d') >> *(torrent_string >> torrent_value) >> lit('e'); +auto torrent_def = +torrent_value; + +BOOST_SPIRIT_DEFINE(torrent_int, torrent_string, torrent_list, torrent_dict, torrent_value, torrent); +} //namespace parser +} //unnamed namespace + +std::vector parse_torrent (std::string_view binary_data) { + using boost::spirit::x3::parse; + + const char* begin = binary_data.data(); + const char* const end = binary_data.data() + binary_data.size(); + std::vector retval; + + const bool r = parse(begin, end, parser::torrent, retval); + +#if !defined(NDEBUG) + std::cout << "done, r=" << r << " (" << begin-binary_data.data() << ")\n"; +#endif + + return retval; +} +} //namespace duck diff --git a/src/parser.hpp b/src/parser.hpp new file mode 100644 index 0000000..5bfd91c --- /dev/null +++ b/src/parser.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include +#include +#include +#include + +namespace duck { +typedef std::string_view TorrentStringType; + +struct TorrentValue : public boost::spirit::x3::variant< + long long, + TorrentStringType, + boost::spirit::x3::forward_ast< std::vector >, + boost::spirit::x3::forward_ast< std::map > +> { + using base_type::base_type; + using base_type::operator=; +}; + +std::vector parse_torrent (std::string_view binary_data); + +} //namespace duck diff --git a/subprojects/libstriezel b/subprojects/libstriezel new file mode 160000 index 0000000..12869dd --- /dev/null +++ b/subprojects/libstriezel @@ -0,0 +1 @@ +Subproject commit 12869dd16fbad4bc60eb264be13cf9f9b6b089ba