diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d6177c..fa50f98 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -119,6 +119,7 @@ add_subdirectory(src/pq) add_subdirectory(src/common) add_subdirectory(src/machinery) add_subdirectory(lib/pbl) +add_subdirectory(lib/glob2regex) #Actions add_subdirectory(src/main) diff --git a/lib/glob2regex/CMakeLists.txt b/lib/glob2regex/CMakeLists.txt new file mode 100644 index 0000000..1b2fdac --- /dev/null +++ b/lib/glob2regex/CMakeLists.txt @@ -0,0 +1,30 @@ +cmake_minimum_required(VERSION 3.3 FATAL_ERROR) +project(glob2regex VERSION 0.1.0 LANGUAGES CXX) + +find_package(Boost 1.53.0 REQUIRED) + +add_library(${PROJECT_NAME} + src/glob2regex.cpp + src/glob_ast.cpp + src/render_ast.cpp +) + +target_include_directories(${PROJECT_NAME} SYSTEM + PRIVATE ${Boost_INCLUDE_DIRS} +) +target_include_directories(${PROJECT_NAME} + PRIVATE src + PUBLIC include +) + +target_compile_features(${PROJECT_NAME} + PUBLIC cxx_range_for + PUBLIC cxx_defaulted_functions + PUBLIC cxx_auto_type + PUBLIC cxx_noexcept + PUBLIC cxx_rvalue_references +) + +target_compile_definitions(${PROJECT_NAME} + PRIVATE BOOST_SPIRIT_USE_PHOENIX_V3=1 +) diff --git a/lib/glob2regex/include/glob2regex/glob2regex.hpp b/lib/glob2regex/include/glob2regex/glob2regex.hpp new file mode 100644 index 0000000..f179233 --- /dev/null +++ b/lib/glob2regex/include/glob2regex/glob2regex.hpp @@ -0,0 +1,27 @@ +/* Copyright 2015, 2016, Michele Santullo + * This file is part of "dindexer". + * + * "dindexer" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "dindexer" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "dindexer". If not, see . + */ + +#ifndef id98DC1C17239B4DD38C8697EC91BC3DA4 +#define id98DC1C17239B4DD38C8697EC91BC3DA4 + +#include + +namespace g2r { + std::string convert ( const std::string& parGlob ); +} //namespace g2r + +#endif diff --git a/lib/glob2regex/src/glob2regex.cpp b/lib/glob2regex/src/glob2regex.cpp new file mode 100644 index 0000000..e3d655b --- /dev/null +++ b/lib/glob2regex/src/glob2regex.cpp @@ -0,0 +1,31 @@ +/* Copyright 2015, 2016, Michele Santullo + * This file is part of "dindexer". + * + * "dindexer" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "dindexer" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "dindexer". If not, see . + */ + +#include "glob2regex/glob2regex.hpp" +#include "glob_ast.hpp" +#include "render_ast.hpp" +#include + +namespace g2r { + namespace { + } //unnamed namespace + + std::string convert (const std::string& parGlob) { + const auto glob_ast = make_ast(parGlob); + return render_ast(glob_ast); + } +} //namespace g2r diff --git a/lib/glob2regex/src/glob_ast.cpp b/lib/glob2regex/src/glob_ast.cpp new file mode 100644 index 0000000..d2b2205 --- /dev/null +++ b/lib/glob2regex/src/glob_ast.cpp @@ -0,0 +1,108 @@ +/* Copyright 2015, 2016, Michele Santullo + * This file is part of "dindexer". + * + * "dindexer" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "dindexer" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "dindexer". If not, see . + */ + +#include "glob_ast.hpp" +#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +//#include +#include +#include +#include +#include +#include + +namespace qi = boost::spirit::qi; + +namespace g2r { + namespace { + template + struct GlobGrammar : qi::grammar { + GlobGrammar ( void ); + ~GlobGrammar ( void ) = default; + + qi::rule start; + qi::rule alternation; + qi::rule group; + qi::rule literal; + qi::rule>()> comma_list; + qi::rule single_char_comma_list; + qi::rule escaped_glob; + qi::rule jolly; + }; + + template + GlobGrammar::GlobGrammar() : + GlobGrammar::base_type(start) + { + using boost::spirit::qi::lit; + using boost::spirit::qi::char_; + using boost::spirit::qi::matches; + using boost::spirit::qi::eps; + using boost::spirit::qi::string; + using boost::spirit::qi::as_string; + using boost::spirit::qi::attr; + using boost::spirit::qi::repeat; + using boost::spirit::qi::inf; + + static const char* const special_char_list = "{}[]*\\+? "; + const uint16_t uint16_zero = 0; + const uint16_t uint16_one = 1; + + start = *(group | alternation | literal | jolly); + comma_list = start % ","; + single_char_comma_list = ~char_(special_char_list) % ","; + alternation = eps >> lit("{") >> comma_list >> "}"; + group = + (lit("[") >> matches[lit("!")] >> as_string[-string("]") >> *(~char_(']') | escaped_glob)] >> "]") | + (attr(false) >> lit("{") >> single_char_comma_list >> lit("}")); + literal = +(~char_(special_char_list) | escaped_glob); + escaped_glob = lit("\\") >> char_(special_char_list); + jolly = (attr(uint16_zero) >> attr(uint16_zero) >> attr(true) >> "**") | + (attr(uint16_zero) >> attr(uint16_zero) >> attr(false) >> "*") | + (attr(uint16_one) >> attr(uint16_one) >> attr(false) >> "?") + ; + } + } //unnamed namespace + + AstType make_ast (const std::string& parGlob) { + GlobGrammar gramm; + + auto glob_beg = parGlob.cbegin(); + AstType glob_ast; + const bool parse_ret = boost::spirit::qi::parse( + glob_beg, + parGlob.end(), + gramm, + glob_ast + ); + + std::cout << "make_ast() - parse_ret = "; + if (parse_ret) + std::cout << "true"; + else + std::cout << "false"; + std::cout << ", glob_ast.size() = " << glob_ast.size() << std::endl; + + return glob_ast; + } +} //namespace g2r diff --git a/lib/glob2regex/src/glob_ast.hpp b/lib/glob2regex/src/glob_ast.hpp new file mode 100644 index 0000000..7e49f8c --- /dev/null +++ b/lib/glob2regex/src/glob_ast.hpp @@ -0,0 +1,29 @@ +/* Copyright 2015, 2016, Michele Santullo + * This file is part of "dindexer". + * + * "dindexer" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "dindexer" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "dindexer". If not, see . + */ + +#ifndef id4E6A756CA0A94213A90A40C567F51824 +#define id4E6A756CA0A94213A90A40C567F51824 + +#include "glob_node_structs.hpp" +#include +#include + +namespace g2r { + AstType make_ast ( const std::string& parGlob ); +} //namespace g2r + +#endif diff --git a/lib/glob2regex/src/glob_node_structs.hpp b/lib/glob2regex/src/glob_node_structs.hpp new file mode 100644 index 0000000..f9f58a4 --- /dev/null +++ b/lib/glob2regex/src/glob_node_structs.hpp @@ -0,0 +1,106 @@ +/* Copyright 2015, 2016, Michele Santullo + * This file is part of "dindexer". + * + * "dindexer" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "dindexer" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "dindexer". If not, see . + */ + +#ifndef id74048277752B4B85935C50F117F65DBE +#define id74048277752B4B85935C50F117F65DBE + +#include +#include +#include +#include +#include +#include +#include + +namespace g2r { + struct GlobAlternation; + + struct GlobGroup { + bool negated; + std::string characters; + }; + struct GlobJolly { + GlobJolly ( void ) = default; + constexpr GlobJolly ( const GlobJolly& ) = default; + constexpr GlobJolly ( uint16_t m, uint16_t M, bool s) : + match_length_min(m), + match_length_max(M), + match_slash(s) + { + } + constexpr bool operator< ( GlobJolly o ) const { + return + // slash = 0, o.slash = 1 + (not match_slash and o.match_slash) or + // slash = o.slash + ((not (match_slash xor o.match_slash)) and + // max < o.max + (((match_length_max < o.match_length_max) or + // max == o.max + (not (match_length_max xor o.match_length_max) and + // min < o.min + match_length_min < o.match_length_min)))); + } + + uint16_t match_length_min; + uint16_t match_length_max; + bool match_slash; + }; +} //namespace g2r + +BOOST_FUSION_ADAPT_STRUCT( + g2r::GlobGroup, + (bool, negated) + (std::string, characters) +); +BOOST_FUSION_ADAPT_STRUCT( + g2r::GlobJolly, + (uint16_t, match_length_min) + (uint16_t, match_length_max) + (bool, match_slash) +); + +namespace g2r { + using GlobNode = boost::variant< + boost::recursive_wrapper, + GlobGroup, + std::string, + GlobJolly + >; + + struct GlobAlternation { + std::vector> alternatives; + }; + + using AstType = std::vector; +} //namespace g2r + +BOOST_FUSION_ADAPT_STRUCT( + g2r::GlobAlternation, + (std::vector>, alternatives) +); + +static_assert(g2r::GlobJolly(1, 2, false) < g2r::GlobJolly(0, 0, true), "Wrong less than"); +static_assert(not (g2r::GlobJolly(1, 2, false) < g2r::GlobJolly(1, 2, false)), "Wrong less than"); +static_assert(g2r::GlobJolly(10, 2, false) < g2r::GlobJolly(0, 3, false), "Wrong less than"); +static_assert(g2r::GlobJolly(10, 2, true) < g2r::GlobJolly(0, 3, true), "Wrong less than"); +static_assert(not (g2r::GlobJolly(10, 3, false) < g2r::GlobJolly(0, 2, false)), "Wrong less than"); +static_assert(not (g2r::GlobJolly(10, 3, true) < g2r::GlobJolly(0, 2, true)), "Wrong less than"); +static_assert(g2r::GlobJolly(10, 3, false) < g2r::GlobJolly(11, 3, false), "Wrong less than"); +static_assert(g2r::GlobJolly(10, 3, true) < g2r::GlobJolly(11, 3, true), "Wrong less than"); + +#endif diff --git a/lib/glob2regex/src/render_ast.cpp b/lib/glob2regex/src/render_ast.cpp new file mode 100644 index 0000000..a80df3c --- /dev/null +++ b/lib/glob2regex/src/render_ast.cpp @@ -0,0 +1,73 @@ +/* Copyright 2015, 2016, Michele Santullo + * This file is part of "dindexer". + * + * "dindexer" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "dindexer" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "dindexer". If not, see . + */ + +#include "render_ast.hpp" +#include +#include +#include + +namespace ka = boost::spirit::karma; + +namespace g2r { + namespace { + template + struct RegexGen : ka::grammar { + RegexGen ( void ); + + boost::spirit::karma::rule start; + boost::spirit::karma::rule alternation; + boost::spirit::karma::rule group; + boost::spirit::karma::rule literal; + boost::spirit::karma::rule jolly; + boost::spirit::karma::symbols negate_group; + boost::spirit::karma::symbols special_char; + boost::spirit::karma::symbols jolly_dic; + }; + + template + RegexGen::RegexGen() : + RegexGen::base_type(start) + { + using ka::lit; + using ka::char_; + using ka::eps; + using ka::symbols; + + negate_group.add(true, "^")(false, ""); + special_char.add('.', "\\.")('*', "\\*")('\\', "\\\\")('?', "\\?") + ('(', "\\(")(')', "\\)")('[', "\\[")(']', "\\]")('{', "\\{") + ('}', "\\}")('$', "\\$")('^', "\\^")('+', "\\+")('|', "\\|"); + jolly_dic.add(GlobJolly(0, 0, false), "[^/]*") + (GlobJolly(1, 1, false), "[^/]") + (GlobJolly(0, 0, true), ".*"); + + start = *(jolly | alternation | group | literal); + group = "[" << negate_group << *(special_char | char_) << "]"; + alternation = eps << "(?:" << (start % '|') << ")"; + literal = *(special_char | char_); + jolly = eps << jolly_dic; + } + } //unnamed namespace + + std::string render_ast (const AstType& parAst) { + RegexGen gramm; + std::ostringstream oss; + oss << ka::format(gramm, parAst); + oss << '$'; + return oss.str(); + } +} //namespace g2r diff --git a/lib/glob2regex/src/render_ast.hpp b/lib/glob2regex/src/render_ast.hpp new file mode 100644 index 0000000..1cf1def --- /dev/null +++ b/lib/glob2regex/src/render_ast.hpp @@ -0,0 +1,29 @@ +/* Copyright 2015, 2016, Michele Santullo + * This file is part of "dindexer". + * + * "dindexer" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "dindexer" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "dindexer". If not, see . + */ + +#ifndef id0F110FDE94C941F1A287D9A3918377FC +#define id0F110FDE94C941F1A287D9A3918377FC + +#include "glob_node_structs.hpp" +#include +#include + +namespace g2r { + std::string render_ast ( const AstType& parAst ); +} //namespace g2r + +#endif diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 9bcc0bf..dfbe920 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -3,6 +3,7 @@ project(${bare_name}-test CXX) add_executable(${PROJECT_NAME} test_diriterator.cpp test_guess_content_type.cpp + test_glob2regex.cpp ) target_include_directories(${PROJECT_NAME} SYSTEM @@ -15,6 +16,7 @@ target_link_libraries(${PROJECT_NAME} PRIVATE ${bare_name}-machinery PRIVATE gtest PRIVATE gtest_main + PRIVATE glob2regex ) add_test( diff --git a/test/unit/test_glob2regex.cpp b/test/unit/test_glob2regex.cpp new file mode 100644 index 0000000..45a420c --- /dev/null +++ b/test/unit/test_glob2regex.cpp @@ -0,0 +1,29 @@ +/* Copyright 2015, 2016, Michele Santullo + * This file is part of "dindexer". + * + * "dindexer" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "dindexer" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "dindexer". If not, see . + */ + +#include "glob2regex/glob2regex.hpp" +#include +#include + +TEST(glob2regex, convert) { + { + const std::string glob = "*.ogg"; + const std::string expected_regex = "[^/]*\\.ogg$"; + const auto auto_regex = g2r::convert(glob); + EXPECT_EQ(expected_regex, auto_regex); + } +}