1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2024-11-25 00:53:43 +00:00

Import glob2regex

It's still missing ast optimization and some code cleanup is also
needed, but it should be working in the general case.
Also write a small unit test for it.
This commit is contained in:
King_DuckZ 2016-05-11 20:36:23 +02:00
parent cfcbcce9e9
commit 05aaaebe0d
11 changed files with 465 additions and 0 deletions

View file

@ -119,6 +119,7 @@ add_subdirectory(src/pq)
add_subdirectory(src/common)
add_subdirectory(src/machinery)
add_subdirectory(lib/pbl)
add_subdirectory(lib/glob2regex)
#Actions
add_subdirectory(src/main)

View file

@ -0,0 +1,30 @@
cmake_minimum_required(VERSION 3.3 FATAL_ERROR)
project(glob2regex VERSION 0.1.0 LANGUAGES CXX)
find_package(Boost 1.53.0 REQUIRED)
add_library(${PROJECT_NAME}
src/glob2regex.cpp
src/glob_ast.cpp
src/render_ast.cpp
)
target_include_directories(${PROJECT_NAME} SYSTEM
PRIVATE ${Boost_INCLUDE_DIRS}
)
target_include_directories(${PROJECT_NAME}
PRIVATE src
PUBLIC include
)
target_compile_features(${PROJECT_NAME}
PUBLIC cxx_range_for
PUBLIC cxx_defaulted_functions
PUBLIC cxx_auto_type
PUBLIC cxx_noexcept
PUBLIC cxx_rvalue_references
)
target_compile_definitions(${PROJECT_NAME}
PRIVATE BOOST_SPIRIT_USE_PHOENIX_V3=1
)

View file

@ -0,0 +1,27 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id98DC1C17239B4DD38C8697EC91BC3DA4
#define id98DC1C17239B4DD38C8697EC91BC3DA4
#include <string>
namespace g2r {
std::string convert ( const std::string& parGlob );
} //namespace g2r
#endif

View file

@ -0,0 +1,31 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "glob2regex/glob2regex.hpp"
#include "glob_ast.hpp"
#include "render_ast.hpp"
#include <vector>
namespace g2r {
namespace {
} //unnamed namespace
std::string convert (const std::string& parGlob) {
const auto glob_ast = make_ast(parGlob);
return render_ast(glob_ast);
}
} //namespace g2r

View file

@ -0,0 +1,108 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "glob_ast.hpp"
#include <boost/spirit/include/qi.hpp>
//#include <boost/spirit/include/qi_core.hpp>
//#include <boost/spirit/include/qi_parse.hpp>
//#include <boost/spirit/include/qi_lit.hpp>
//#include <boost/spirit/include/qi_char_.hpp>
//#include <boost/spirit/include/qi_plus.hpp>
//#include <boost/spirit/include/qi_matches.hpp>
//#include <boost/spirit/include/qi_raw.hpp>
//#include <boost/spirit/include/qi_grammar.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/phoenix/object/construct.hpp>
#include <boost/phoenix/stl/container.hpp>
#include <boost/phoenix/bind/bind_member_function.hpp>
#include <boost/phoenix/operator.hpp>
namespace qi = boost::spirit::qi;
namespace g2r {
namespace {
template <typename Iterator>
struct GlobGrammar : qi::grammar<Iterator, AstType()> {
GlobGrammar ( void );
~GlobGrammar ( void ) = default;
qi::rule<Iterator, AstType()> start;
qi::rule<Iterator, GlobAlternation()> alternation;
qi::rule<Iterator, GlobGroup()> group;
qi::rule<Iterator, std::string()> literal;
qi::rule<Iterator, std::vector<std::vector<GlobNode>>()> comma_list;
qi::rule<Iterator, std::string()> single_char_comma_list;
qi::rule<Iterator, char()> escaped_glob;
qi::rule<Iterator, GlobJolly()> jolly;
};
template <typename Iterator>
GlobGrammar<Iterator>::GlobGrammar() :
GlobGrammar<Iterator>::base_type(start)
{
using boost::spirit::qi::lit;
using boost::spirit::qi::char_;
using boost::spirit::qi::matches;
using boost::spirit::qi::eps;
using boost::spirit::qi::string;
using boost::spirit::qi::as_string;
using boost::spirit::qi::attr;
using boost::spirit::qi::repeat;
using boost::spirit::qi::inf;
static const char* const special_char_list = "{}[]*\\+? ";
const uint16_t uint16_zero = 0;
const uint16_t uint16_one = 1;
start = *(group | alternation | literal | jolly);
comma_list = start % ",";
single_char_comma_list = ~char_(special_char_list) % ",";
alternation = eps >> lit("{") >> comma_list >> "}";
group =
(lit("[") >> matches[lit("!")] >> as_string[-string("]") >> *(~char_(']') | escaped_glob)] >> "]") |
(attr(false) >> lit("{") >> single_char_comma_list >> lit("}"));
literal = +(~char_(special_char_list) | escaped_glob);
escaped_glob = lit("\\") >> char_(special_char_list);
jolly = (attr(uint16_zero) >> attr(uint16_zero) >> attr(true) >> "**") |
(attr(uint16_zero) >> attr(uint16_zero) >> attr(false) >> "*") |
(attr(uint16_one) >> attr(uint16_one) >> attr(false) >> "?")
;
}
} //unnamed namespace
AstType make_ast (const std::string& parGlob) {
GlobGrammar<std::string::const_iterator> gramm;
auto glob_beg = parGlob.cbegin();
AstType glob_ast;
const bool parse_ret = boost::spirit::qi::parse(
glob_beg,
parGlob.end(),
gramm,
glob_ast
);
std::cout << "make_ast() - parse_ret = ";
if (parse_ret)
std::cout << "true";
else
std::cout << "false";
std::cout << ", glob_ast.size() = " << glob_ast.size() << std::endl;
return glob_ast;
}
} //namespace g2r

View file

@ -0,0 +1,29 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id4E6A756CA0A94213A90A40C567F51824
#define id4E6A756CA0A94213A90A40C567F51824
#include "glob_node_structs.hpp"
#include <vector>
#include <string>
namespace g2r {
AstType make_ast ( const std::string& parGlob );
} //namespace g2r
#endif

View file

@ -0,0 +1,106 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id74048277752B4B85935C50F117F65DBE
#define id74048277752B4B85935C50F117F65DBE
#include <boost/fusion/adapted/struct.hpp>
#include <boost/variant/variant.hpp>
#include <boost/variant/recursive_wrapper.hpp>
#include <vector>
#include <string>
#include <ciso646>
#include <cstdint>
namespace g2r {
struct GlobAlternation;
struct GlobGroup {
bool negated;
std::string characters;
};
struct GlobJolly {
GlobJolly ( void ) = default;
constexpr GlobJolly ( const GlobJolly& ) = default;
constexpr GlobJolly ( uint16_t m, uint16_t M, bool s) :
match_length_min(m),
match_length_max(M),
match_slash(s)
{
}
constexpr bool operator< ( GlobJolly o ) const {
return
// slash = 0, o.slash = 1
(not match_slash and o.match_slash) or
// slash = o.slash
((not (match_slash xor o.match_slash)) and
// max < o.max
(((match_length_max < o.match_length_max) or
// max == o.max
(not (match_length_max xor o.match_length_max) and
// min < o.min
match_length_min < o.match_length_min))));
}
uint16_t match_length_min;
uint16_t match_length_max;
bool match_slash;
};
} //namespace g2r
BOOST_FUSION_ADAPT_STRUCT(
g2r::GlobGroup,
(bool, negated)
(std::string, characters)
);
BOOST_FUSION_ADAPT_STRUCT(
g2r::GlobJolly,
(uint16_t, match_length_min)
(uint16_t, match_length_max)
(bool, match_slash)
);
namespace g2r {
using GlobNode = boost::variant<
boost::recursive_wrapper<GlobAlternation>,
GlobGroup,
std::string,
GlobJolly
>;
struct GlobAlternation {
std::vector<std::vector<GlobNode>> alternatives;
};
using AstType = std::vector<GlobNode>;
} //namespace g2r
BOOST_FUSION_ADAPT_STRUCT(
g2r::GlobAlternation,
(std::vector<std::vector<g2r::GlobNode>>, alternatives)
);
static_assert(g2r::GlobJolly(1, 2, false) < g2r::GlobJolly(0, 0, true), "Wrong less than");
static_assert(not (g2r::GlobJolly(1, 2, false) < g2r::GlobJolly(1, 2, false)), "Wrong less than");
static_assert(g2r::GlobJolly(10, 2, false) < g2r::GlobJolly(0, 3, false), "Wrong less than");
static_assert(g2r::GlobJolly(10, 2, true) < g2r::GlobJolly(0, 3, true), "Wrong less than");
static_assert(not (g2r::GlobJolly(10, 3, false) < g2r::GlobJolly(0, 2, false)), "Wrong less than");
static_assert(not (g2r::GlobJolly(10, 3, true) < g2r::GlobJolly(0, 2, true)), "Wrong less than");
static_assert(g2r::GlobJolly(10, 3, false) < g2r::GlobJolly(11, 3, false), "Wrong less than");
static_assert(g2r::GlobJolly(10, 3, true) < g2r::GlobJolly(11, 3, true), "Wrong less than");
#endif

View file

@ -0,0 +1,73 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "render_ast.hpp"
#include <boost/spirit/include/karma.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <sstream>
namespace ka = boost::spirit::karma;
namespace g2r {
namespace {
template <typename Iterator>
struct RegexGen : ka::grammar<Iterator, AstType()> {
RegexGen ( void );
boost::spirit::karma::rule<Iterator, AstType()> start;
boost::spirit::karma::rule<Iterator, GlobAlternation()> alternation;
boost::spirit::karma::rule<Iterator, GlobGroup()> group;
boost::spirit::karma::rule<Iterator, std::string()> literal;
boost::spirit::karma::rule<Iterator, GlobJolly()> jolly;
boost::spirit::karma::symbols<bool, const char*> negate_group;
boost::spirit::karma::symbols<char, const char*> special_char;
boost::spirit::karma::symbols<GlobJolly, const char*> jolly_dic;
};
template <typename Iterator>
RegexGen<Iterator>::RegexGen() :
RegexGen<Iterator>::base_type(start)
{
using ka::lit;
using ka::char_;
using ka::eps;
using ka::symbols;
negate_group.add(true, "^")(false, "");
special_char.add('.', "\\.")('*', "\\*")('\\', "\\\\")('?', "\\?")
('(', "\\(")(')', "\\)")('[', "\\[")(']', "\\]")('{', "\\{")
('}', "\\}")('$', "\\$")('^', "\\^")('+', "\\+")('|', "\\|");
jolly_dic.add(GlobJolly(0, 0, false), "[^/]*")
(GlobJolly(1, 1, false), "[^/]")
(GlobJolly(0, 0, true), ".*");
start = *(jolly | alternation | group | literal);
group = "[" << negate_group << *(special_char | char_) << "]";
alternation = eps << "(?:" << (start % '|') << ")";
literal = *(special_char | char_);
jolly = eps << jolly_dic;
}
} //unnamed namespace
std::string render_ast (const AstType& parAst) {
RegexGen<boost::spirit::ostream_iterator> gramm;
std::ostringstream oss;
oss << ka::format(gramm, parAst);
oss << '$';
return oss.str();
}
} //namespace g2r

View file

@ -0,0 +1,29 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id0F110FDE94C941F1A287D9A3918377FC
#define id0F110FDE94C941F1A287D9A3918377FC
#include "glob_node_structs.hpp"
#include <vector>
#include <string>
namespace g2r {
std::string render_ast ( const AstType& parAst );
} //namespace g2r
#endif

View file

@ -3,6 +3,7 @@ project(${bare_name}-test CXX)
add_executable(${PROJECT_NAME}
test_diriterator.cpp
test_guess_content_type.cpp
test_glob2regex.cpp
)
target_include_directories(${PROJECT_NAME} SYSTEM
@ -15,6 +16,7 @@ target_link_libraries(${PROJECT_NAME}
PRIVATE ${bare_name}-machinery
PRIVATE gtest
PRIVATE gtest_main
PRIVATE glob2regex
)
add_test(

View file

@ -0,0 +1,29 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "glob2regex/glob2regex.hpp"
#include <gtest/gtest.h>
#include <string>
TEST(glob2regex, convert) {
{
const std::string glob = "*.ogg";
const std::string expected_regex = "[^/]*\\.ogg$";
const auto auto_regex = g2r::convert(glob);
EXPECT_EQ(expected_regex, auto_regex);
}
}