Refactor higher level code into TorrentRead class

This also addresses the issue with paths of more than 1 item
so all torrents should now work and adds some error reporting
through exceptions
This commit is contained in:
King_DuckZ 2025-04-07 01:01:52 +01:00
parent 3e2cfb3b37
commit 50f33300be
10 changed files with 350 additions and 47 deletions

View file

@ -3,7 +3,7 @@ project('ducktorrent', 'cpp',
meson_version: '>=0.63.0', meson_version: '>=0.63.0',
default_options: [ default_options: [
'buildtype=release', 'buildtype=release',
'cpp_std=gnu++20', 'cpp_std=gnu++23',
'b_ndebug=if-release', 'b_ndebug=if-release',
'c_std=c99', 'c_std=c99',
], ],

View file

@ -16,8 +16,7 @@
*/ */
#include "parser.hpp" #include "parser.hpp"
#include "visitors/debug_visitor.hpp" #include "torrent_read.hpp"
#include "visitors/find_t_visitor.hpp"
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
@ -28,16 +27,6 @@
#include <sha1/BufferSource.hpp> #include <sha1/BufferSource.hpp>
namespace { namespace {
std::string load_file (const std::string& path) {
std::ifstream ifs(path);
ifs >> std::noskipws;
return std::string(
std::istreambuf_iterator<char>(ifs),
std::istreambuf_iterator<char>()
);
}
SHA1::MessageDigest to_hash160_digest (const std::array<std::uint32_t, 5>& arr) { SHA1::MessageDigest to_hash160_digest (const std::array<std::uint32_t, 5>& arr) {
SHA1::MessageDigest hash_found; SHA1::MessageDigest hash_found;
std::copy_n(arr.begin(), 5, hash_found.hash); std::copy_n(arr.begin(), 5, hash_found.hash);
@ -52,33 +41,24 @@ int main(int argc, const char* argv[]) {
return 2; return 2;
} }
std::string full_torrent = load_file(argv[1]); duck::TorrentRead torrent(argv[1], "");
std::cout << "Loaded file into string of size " << full_torrent.size() << '\n'; std::cout << "Loaded file into string of size " << torrent.raw_data_size() << '\n';
std::vector<duck::TorrentValue> values = duck::parse_torrent(full_torrent);
for (const auto& value : values) { torrent.print(std::cout);
duck::DebugVisitor visitor;
boost::apply_visitor(visitor, value);
}
const auto& values = torrent.parsed_values();
auto hashes = duck::collect_hashes(values); auto hashes = duck::collect_hashes(values);
std::cout << "Got " << hashes.size() << " hashes\n"; std::cout << "Got " << hashes.size() << " hashes\n";
const auto variant_array = duck::find_variant("/info/files", values); const auto file_count = torrent.read_file_count();
if (not variant_array) {
std::cout << "No object found in torrent file at path /info/files\n";
return 1;
}
const auto file_count = static_cast<std::int_fast32_t>(duck::find_int("/[[size]]", *variant_array));
if (file_count) if (file_count)
std::cout << "Input has " << file_count << " file entries\n"; std::cout << "Input has " << file_count << " file entries\n";
else else
std::cout << "Input seems to contain a single file only\n"; std::cout << "Input seems to contain a single file only\n";
const auto piece_length = static_cast<std::size_t>(duck::find_int("/info/piece length", values)); const std::size_t piece_length = torrent.read_piece_length();
if (0 == file_count) { if (0 == file_count) {
std::string file_name = std::string{duck::find_string("/info/name", values)}; std::string file_name{torrent.read_file_name()};
std::cout << "Found file name \"" << file_name << "\"\n"; std::cout << "Found file name \"" << file_name << "\"\n";
std::ifstream istream(file_name, std::ios::in|std::ios::binary); std::ifstream istream(file_name, std::ios::in|std::ios::binary);
@ -105,16 +85,15 @@ int main(int argc, const char* argv[]) {
} }
} }
else { else {
std::string search("/[[");
std::size_t hash_index = 0; std::size_t hash_index = 0;
std::size_t match_count = 0; std::size_t match_count = 0;
std::size_t read_size = 0; std::size_t read_size = 0;
std::vector<std::uint8_t> buff; std::vector<std::uint8_t> buff;
for (std::int_fast32_t z = 0; z < file_count; ++z) { for (std::int_fast32_t z = 0; z < file_count; ++z) {
auto file_name = std::string{duck::find_string(search + std::to_string(z) + "]]/path/[[0]]", *variant_array)}; std::string curr_path = torrent.read_joint_file_path(z, '/');
std::cout << "path index " << z << '\t' << file_name << '\n'; std::cout << "path index " << z << '\t' << curr_path << '\n';
std::ifstream istream(file_name, std::ios::in|std::ios::binary); std::ifstream istream(curr_path, std::ios::in|std::ios::binary);
if (istream.is_open()) { if (istream.is_open()) {
buff.resize(piece_length); buff.resize(piece_length);
while (istream.read(reinterpret_cast<char*>(buff.data() + read_size), piece_length - read_size).gcount() > 0) { while (istream.read(reinterpret_cast<char*>(buff.data() + read_size), piece_length - read_size).gcount() > 0) {
@ -124,7 +103,7 @@ int main(int argc, const char* argv[]) {
SHA1::MessageDigest calculated_hash = SHA1::computeFromSource(buff_source); SHA1::MessageDigest calculated_hash = SHA1::computeFromSource(buff_source);
SHA1::MessageDigest stored_hash = to_hash160_digest(hashes[hash_index++]); SHA1::MessageDigest stored_hash = to_hash160_digest(hashes[hash_index++]);
std::cout << stored_hash.toHexString() << " " << std::cout << stored_hash.toHexString() << " " <<
calculated_hash.toHexString() << " " << file_name << '\n'; calculated_hash.toHexString() << " " << curr_path << '\n';
if (stored_hash == calculated_hash) if (stored_hash == calculated_hash)
++match_count; ++match_count;
read_size = 0; read_size = 0;
@ -132,7 +111,7 @@ int main(int argc, const char* argv[]) {
} }
} }
else { else {
std::cout << "Unable to open " << file_name << ", bailing out of hash verification\n"; std::cout << "Unable to open " << curr_path << ", bailing out of hash verification\n";
break; break;
} }
} }

View file

@ -2,6 +2,7 @@ executable(meson.project_name(),
'main.cpp', 'main.cpp',
'parser.cpp', 'parser.cpp',
'split.cpp', 'split.cpp',
'torrent_read.cpp',
'visitors/debug_visitor.cpp', 'visitors/debug_visitor.cpp',
'visitors/find_t_visitor.cpp', 'visitors/find_t_visitor.cpp',
dependencies: [ dependencies: [

View file

@ -27,8 +27,11 @@
#if !defined(NDEBUG) #if !defined(NDEBUG)
# include <iostream> # include <iostream>
#endif #endif
#include <ciso646>
#include <stdexcept> #include <stdexcept>
#include <cstddef> #include <cstddef>
#include <ios>
#include <sstream>
namespace boost::spirit::x3::traits { namespace boost::spirit::x3::traits {
template <typename Char, typename Trait> template <typename Char, typename Trait>
@ -133,6 +136,10 @@ BOOST_SPIRIT_DEFINE(torrent_int, torrent_string, torrent_list, torrent_dict, tor
} //namespace parser } //namespace parser
} //unnamed namespace } //unnamed namespace
ParseError::ParseError (const std::string& desc) :
std::runtime_error("Parse error: " + desc)
{ }
std::vector<TorrentValue> parse_torrent (std::string_view binary_data) { std::vector<TorrentValue> parse_torrent (std::string_view binary_data) {
using boost::spirit::x3::parse; using boost::spirit::x3::parse;
@ -146,6 +153,14 @@ std::vector<TorrentValue> parse_torrent (std::string_view binary_data) {
std::cout << "done, r=" << r << " (" << begin-binary_data.data() << ")\n"; std::cout << "done, r=" << r << " (" << begin-binary_data.data() << ")\n";
#endif #endif
if (not r or begin != end) {
std::ostringstream oss;
oss << "error parsing raw input data at position " <<
(begin - binary_data.data()) << ", parser function returned " <<
std::boolalpha << r;
throw ParseError(oss.str());
}
return retval; return retval;
} }

View file

@ -23,11 +23,18 @@
#include <vector> #include <vector>
#include <array> #include <array>
#include <cstdint> #include <cstdint>
#include <stdexcept>
namespace duck { namespace duck {
typedef std::string_view TorrentStringType; typedef std::string_view TorrentStringType;
typedef signed long long int TorrentIntType; typedef signed long long int TorrentIntType;
class ParseError : public std::runtime_error {
public:
ParseError (const std::string& desc);
~ParseError() noexcept = default;
};
struct TorrentValue : public boost::spirit::x3::variant< struct TorrentValue : public boost::spirit::x3::variant<
TorrentIntType, TorrentIntType,
TorrentStringType, TorrentStringType,

225
src/torrent_read.cpp Normal file
View file

@ -0,0 +1,225 @@
/* Copyright 2025, Michele "King_DuckZ" Santullo
* This file is part of ducktorrent.
*
* Ducktorrent is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Ducktorrent is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ducktorrent. If not, see <http://www.gnu.org/licenses/>.
*/
#include "torrent_read.hpp"
#include "parser.hpp"
#include "visitors/debug_visitor.hpp"
#include "visitors/find_t_visitor.hpp"
#include <utility>
#include <fstream>
#include <system_error>
#include <boost/variant/apply_visitor.hpp>
#include <ciso646>
#include <cassert>
#include <algorithm>
#include <ranges>
#include <numeric>
namespace duck {
namespace {
std::string load_file (const std::filesystem::path& path) {
std::ifstream ifs(path);
if (ifs.is_open()) {
ifs >> std::noskipws;
return std::string(
std::istreambuf_iterator<char>(ifs),
std::istreambuf_iterator<char>()
);
}
else {
throw std::filesystem::filesystem_error(
"Error reading " + path.string(),
std::make_error_code(std::errc::no_such_file_or_directory)
);
}
}
template <std::size_t OutS, std::size_t PrefS, std::size_t PostS>
std::string_view build_fast_search_path (
const char (&prefix)[PrefS],
const char (&postfix)[PostS],
std::size_t index,
char (&out_buff)[OutS],
std::string& emergency_buff
) {
static_assert(PrefS >= 1 and PostS >= 1, "Expected null-terminated inputs");
constexpr std::size_t prefix_size = PrefS - 1;
constexpr std::size_t postfix_size = PostS - 1;
constexpr std::size_t output_max_size = OutS;
std::string_view retval;
auto num = std::to_string(index); //short string optimisation should prevent allocation
const std::size_t real_out_size = prefix_size + num.size() + postfix_size;
if (real_out_size <= output_max_size) {
//Avoid allocations, just because
std::copy_n(prefix, prefix_size, out_buff);
std::copy_n(num.data(), num.size(), out_buff + prefix_size);
std::copy_n(postfix, postfix_size, out_buff + prefix_size + num.size());
retval = std::string_view(out_buff, prefix_size + num.size() + postfix_size);
}
else {
//too long, I yield, allocate away!!
emergency_buff.reserve(real_out_size);
std::string_view prefix_sv{prefix, prefix_size};
emergency_buff += prefix_sv;
emergency_buff += num;
std::string_view postfix_sv{postfix, postfix_size};
emergency_buff += postfix_sv;
retval = emergency_buff;
}
return retval;
}
} //unnamed namespace
TorrentRead::TorrentRead (std::string_view torrent_path, std::filesystem::path workdir) :
m_workdir(std::move(workdir)),
m_torrent_path(torrent_path),
m_raw_torrent(load_file(m_torrent_path)),
m_parsed_values(parse_torrent(m_raw_torrent)),
m_cached_info_files(nullptr)
{
}
TorrentRead::~TorrentRead() noexcept = default;
void TorrentRead::print (std::ostream& out) const {
DebugVisitor visitor(out);
for (const TorrentValue& value : m_parsed_values) {
boost::apply_visitor(visitor, value);
}
}
std::size_t TorrentRead::raw_data_size() const {
return m_raw_torrent.size();
}
const std::vector<TorrentValue>& TorrentRead::parsed_values() const {
return m_parsed_values;
}
std::string_view TorrentRead::read_file_name() const {
return find_string("/info/name", m_parsed_values);
}
std::size_t TorrentRead::read_piece_length() const {
return find_int<std::size_t>("/info/piece length", m_parsed_values);
}
std::int_fast32_t TorrentRead::read_file_count() const {
return find_int<std::int_fast32_t>("/[[size]]", cached_info_files());
}
std::vector<std::string_view> TorrentRead::read_file_path(std::size_t index) const {
const TorrentValue* path_variant;
std::string buff2;
{
constexpr char prefix[] = "/[[";
constexpr char postfix[] = "]]/path";
constexpr std::size_t prefix_size = sizeof(prefix) / sizeof(prefix[0]) - 1u;
constexpr std::size_t postfix_size = sizeof(postfix) / sizeof(postfix[0]) - 1u;
//see comment in read_file_size()
//in this case a search should look like "/[[1234]]/path"
char buff[4 + prefix_size + postfix_size];
auto found = find_variant(
build_fast_search_path(prefix, postfix, index, buff, buff2),
cached_info_files()
);
if (not found)
return {};
path_variant = &found->get();
}
const auto path_size = find_int<std::int_fast32_t>("/[[size]]", *path_variant);
std::vector<std::string_view> retval;
retval.reserve(path_size);
{
constexpr char prefix[] = "/[[";
constexpr char postfix[] = "]]";
constexpr std::size_t prefix_size = sizeof(prefix) / sizeof(prefix[0]) - 1u;
constexpr std::size_t postfix_size = sizeof(postfix) / sizeof(postfix[0]) - 1u;
char buff[4 + prefix_size + postfix_size];
for (std::int_fast32_t z = 0; z < path_size; ++z) {
std::string_view piece = find_string(
build_fast_search_path(prefix, postfix, z, buff, buff2),
*path_variant
);
retval.push_back(piece);
}
}
return retval;
}
std::string TorrentRead::read_joint_file_path(std::size_t index, char sep) const {
auto pieces = this->read_file_path(index);
const std::size_t out_size = std::accumulate(pieces.cbegin(), pieces.cend(), 0u,
[](std::size_t a, const std::string_view& b) {return a+b.size();}
);
std::string retval;
retval.reserve(out_size + std::max<std::size_t>(pieces.size(), 1u) - 1u);
std::ranges::copy(pieces | std::views::join_with(sep), std::back_inserter(retval));
return retval;
}
std::size_t TorrentRead::read_file_size (std::size_t index) const {
constexpr char prefix[] = "/[[";
constexpr char postfix[] = "]]/length";
constexpr std::size_t prefix_size = sizeof(prefix) / sizeof(prefix[0]) - 1u;
constexpr std::size_t postfix_size = sizeof(postfix) / sizeof(postfix[0]) - 1u;
//accomodate paths with indices up to 9999 without nullchar
//ie: "/[[1234]]/length" like this, if this comment doesn't go out of sync
//with the code the size on the stack should be 16
char buff[4 + prefix_size + postfix_size];
std::string buff2;
return find_int<std::size_t>(
build_fast_search_path(prefix, postfix, index, buff, buff2),
cached_info_files()
);
}
const TorrentValue& TorrentRead::cached_info_files() const {
constexpr char info_files[] = "/info/files";
if (not m_cached_info_files) {
auto found = find_variant(info_files, m_parsed_values);
if (found) {
m_cached_info_files = &found->get();
}
}
if (not m_cached_info_files) {
throw std::runtime_error(
std::string{"Node \""} + info_files + "\" not found in " + m_torrent_path.string()
);
}
assert(m_cached_info_files);
return *m_cached_info_files;
}
} //namespace duck

63
src/torrent_read.hpp Normal file
View file

@ -0,0 +1,63 @@
/* Copyright 2025, Michele "King_DuckZ" Santullo
* This file is part of ducktorrent.
*
* Ducktorrent is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Ducktorrent is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ducktorrent. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
//This file contains higher level helpers that build on top of the parser and
//the visitors in parser.hpp and visitors/find_t_visitor.hpp. It is just here
//for convenience and it is not strictly required to read and parse bencoded
//data.
#include <vector>
#include <string>
#include <string_view>
#include <filesystem>
#include <ostream>
#include <cstddef>
#include <cstdint>
namespace duck {
struct TorrentValue;
class TorrentRead {
public:
TorrentRead (std::string_view torrent_path, std::filesystem::path workdir);
TorrentRead (const TorrentRead&) = delete;
~TorrentRead() noexcept;
void print (std::ostream& out) const;
std::size_t raw_data_size() const;
const std::vector<TorrentValue>& parsed_values() const;
std::string_view read_file_name() const;
std::size_t read_piece_length() const;
std::int_fast32_t read_file_count() const;
std::vector<std::string_view> read_file_path(std::size_t index) const;
std::string read_joint_file_path(std::size_t index, char sep) const;
std::size_t read_file_size(std::size_t index) const;
private:
const TorrentValue& cached_info_files() const;
std::filesystem::path m_workdir;
std::filesystem::path m_torrent_path;
std::string m_raw_torrent;
std::vector<TorrentValue> m_parsed_values;
mutable const TorrentValue* m_cached_info_files;
};
} //namespace duck

View file

@ -34,7 +34,15 @@ bool is_printable(std::string_view s) {
} //unnamed namespace } //unnamed namespace
DebugVisitor::DebugVisitor(int depth, bool skip_first) : DebugVisitor::DebugVisitor(int depth, bool skip_first) :
m_depth(depth), m_first_line(skip_first), m_skip_first(skip_first) {} DebugVisitor(std::cout, depth, skip_first)
{ }
DebugVisitor::DebugVisitor(std::ostream& out, int depth, bool skip_first) :
m_out(out),
m_depth(depth),
m_first_line(skip_first),
m_skip_first(skip_first)
{}
void DebugVisitor::reset() { void DebugVisitor::reset() {
m_first_line = m_skip_first; m_first_line = m_skip_first;
@ -42,7 +50,7 @@ void DebugVisitor::reset() {
void DebugVisitor::operator() (TorrentIntType value) { void DebugVisitor::operator() (TorrentIntType value) {
print_tabs(); print_tabs();
std::cout << value << '\n'; m_out << value << '\n';
} }
void DebugVisitor::operator() (const TorrentStringType& value) { void DebugVisitor::operator() (const TorrentStringType& value) {
@ -62,12 +70,12 @@ void DebugVisitor::operator() (const TorrentStringType& value) {
str = "<binary_data>"; str = "<binary_data>";
} }
std::cout << "\"" << str << ellip << "\"[" << value.size() << "]\n"; m_out << "\"" << str << ellip << "\"[" << value.size() << "]\n";
} }
void DebugVisitor::operator() (const std::vector<TorrentValue>& value) { void DebugVisitor::operator() (const std::vector<TorrentValue>& value) {
print_tabs(); print_tabs();
std::cout << "[\n"; m_out << "[\n";
DebugVisitor new_visitor(m_depth+1, false); DebugVisitor new_visitor(m_depth+1, false);
for (const auto& item : value) { for (const auto& item : value) {
@ -75,28 +83,28 @@ void DebugVisitor::operator() (const std::vector<TorrentValue>& value) {
} }
print_tabs(); print_tabs();
std::cout << "]\n"; m_out << "]\n";
} }
void DebugVisitor::operator() (const std::map<TorrentStringType, TorrentValue>& value) { void DebugVisitor::operator() (const std::map<TorrentStringType, TorrentValue>& value) {
print_tabs(); print_tabs();
std::cout << "{\n"; m_out << "{\n";
DebugVisitor new_visitor(m_depth+1); DebugVisitor new_visitor(m_depth+1);
for (const auto& item : value) { for (const auto& item : value) {
print_tabs(1); print_tabs(1);
std::cout << item.first << " => "; m_out << item.first << " => ";
new_visitor.reset(); new_visitor.reset();
boost::apply_visitor(new_visitor, item.second); boost::apply_visitor(new_visitor, item.second);
} }
print_tabs(); print_tabs();
std::cout << "}\n"; m_out << "}\n";
} }
void DebugVisitor::print_tabs(int extra) { void DebugVisitor::print_tabs(int extra) {
if (!m_first_line) { if (!m_first_line) {
for (int z = 0; z < m_depth+extra; ++z) std::cout << '\t'; for (int z = 0; z < m_depth+extra; ++z) m_out << '\t';
} }
else { else {
m_first_line = false; m_first_line = false;

View file

@ -20,11 +20,13 @@
#include "../parser.hpp" #include "../parser.hpp"
#include <cstddef> #include <cstddef>
#include <ostream>
namespace duck { namespace duck {
class DebugVisitor { class DebugVisitor {
public: public:
DebugVisitor(int depth=0, bool skip_first=true); DebugVisitor(int depth=0, bool skip_first=true);
DebugVisitor(std::ostream& out, int depth=0, bool skip_first=true);
void reset(); void reset();
void operator() (TorrentIntType value); void operator() (TorrentIntType value);
@ -35,6 +37,7 @@ public:
private: private:
void print_tabs(int extra=0); void print_tabs(int extra=0);
std::ostream& m_out;
int m_depth; int m_depth;
bool m_first_line; bool m_first_line;
bool m_skip_first; bool m_skip_first;

View file

@ -82,8 +82,9 @@ inline auto find_item(std::string_view path, const TorrentValue& value) {
return boost::apply_visitor(visitor, value); return boost::apply_visitor(visitor, value);
} }
inline TorrentIntType find_int(std::string_view path, const std::vector<TorrentValue>& values) { template <typename T=TorrentIntType>
return find_item<TorrentIntType>(path, values); inline T find_int(std::string_view path, const std::vector<TorrentValue>& values) {
return static_cast<T>(find_item<TorrentIntType>(path, values));
} }
inline TorrentStringType find_string(std::string_view path, const std::vector<TorrentValue>& values) { inline TorrentStringType find_string(std::string_view path, const std::vector<TorrentValue>& values) {
@ -94,8 +95,9 @@ inline auto find_variant(std::string_view path, const std::vector<TorrentValue>&
return find_item<TorrentValue>(path, values); return find_item<TorrentValue>(path, values);
} }
template <typename T=TorrentIntType>
inline TorrentIntType find_int(std::string_view path, const TorrentValue& value) { inline TorrentIntType find_int(std::string_view path, const TorrentValue& value) {
return find_item<TorrentIntType>(path, value); return static_cast<T>(find_item<TorrentIntType>(path, value));
} }
inline TorrentStringType find_string(std::string_view path, const TorrentValue& value) { inline TorrentStringType find_string(std::string_view path, const TorrentValue& value) {