refactor tokenizer with delimiter change support

2015-04-24 00:35:13 +02:00 · 2015-04-24 00:35:13 +02:00 · 8a32d98443
commit 8a32d98443
parent 7bdde0783f
14 changed files with 65 additions and 89 deletions
--- a/src/template_type.cpp
+++ b/src/template_type.cpp
@ -7,55 +7,40 @@ template_type::template_type(const std::string& str) {
  strip_whitespace();
 }

-void template_type::tokenize(const std::string& t) {
-  std::string delim_start{"{{"};
-  std::string delim_end{"}}"};
-  std::string::const_iterator tok_end, tok_start = t.begin();
-  parse_state pstate = parse_state::start;
-  unsigned int del_pos = 0;
-  for (std::string::const_iterator it = t.begin(); it != t.end(); ++it) {
-    if (pstate == parse_state::start) {
-      if (*it == delim_start[0]) {
-        pstate = parse_state::in_del_start;
-        tok_end = it;
-        del_pos = 1;
-      } else if (*it == '\n') {
-        tokens.push_back({{tok_start, it + 1}});
-        tok_start = it + 1;
+void template_type::process_text(citer begin, citer end) {
+  if (begin == end)
+    return;
+  auto start = begin;
+  for (auto it = begin; it != end; ++it)
+    if (*it == '\n' || it == end - 1) {
+      tokens.push_back({{start, it + 1}});
+      start = it + 1;
+    }
+}
+
+void template_type::tokenize(const std::string& tmplt) {
+  std::string open{"{{"}, close{"}}"};
+  citer beg = tmplt.begin();
+  for (unsigned long pos = 0; pos < tmplt.size();) {
+    auto to = tmplt.find(open, pos);
+    auto tc = tmplt.find(close, (to == std::string::npos)?to:(to + 1));
+    if (tc != std::string::npos && to != std::string::npos) {
+      if (*(beg + to + open.size()) == '{' && *(beg + tc + close.size()) == '}')
+        ++tc;
+      process_text(beg + pos, beg + to);
+      pos = tc + close.size();
+      tokens.push_back({{beg + to, beg + tc + close.size()},
+        open.size(), close.size()});
+      if (*(beg + to + open.size()) == '=' && *(beg + tc - 1) == '=') {
+        open = {beg + to + open.size() + 1, beg + tmplt.find(' ', to)};
+        close = {beg + tmplt.find(' ', to) + 1, beg + tc - 1};
      }
-    } else if (pstate == parse_state::in_del_start) {
-      if (*it == delim_start[del_pos] && ++del_pos == delim_start.size())
-        pstate = parse_state::in_del;
-      else
-        pstate = parse_state::start;
-    } else if (pstate == parse_state::in_del) {
-      if (*it == '{')
-        pstate = parse_state::in_esccontent;
-      else if (*it == delim_end[0] && (del_pos = 1))
-        pstate = parse_state::in_del_end;
-      else
-        pstate = parse_state::in_content;
-    } else if (pstate == parse_state::in_esccontent && *it == '}') {
-      pstate = parse_state::in_content;
-    } else if (pstate == parse_state::in_content && *it == delim_end[0]) {
-      pstate = parse_state::in_del_end;
-      del_pos = 1;
-    } else if (pstate == parse_state::in_del_end) {
-      if (*it == delim_end[del_pos] && ++del_pos == delim_end.size()) {
-        pstate = parse_state::start;
-        tokens.push_back({{tok_start, tok_end}});
-        tokens.push_back(
-            {{tok_end, it + 1},
-             delim_start.size(),
-             delim_end.size()});
-        tok_start = it + 1;
    } else {
-        pstate = parse_state::start;
+      process_text(beg + pos, tmplt.end());
+      pos = tc;
    }
  }
 }
-  tokens.push_back({{tok_start, t.end()}});
-}

 void template_type::strip_whitespace() {
  auto line_begin = tokens.begin();
--- a/src/template_type.hpp
+++ b/src/template_type.hpp
@ -4,6 +4,7 @@
 #include <vector>

 #include "token.hpp"
+#include "utils.hpp"

 namespace mstch {

@ -16,12 +17,10 @@ class template_type {
  void operator<<(const token& token) { tokens.push_back(token); }

 private:
-  enum class parse_state {
-    start, in_del_start, in_del, in_content, in_esccontent, in_del_end
-  };
-  void tokenize(const std::string& str);
-  void strip_whitespace();
  std::vector<token> tokens;
+  void strip_whitespace();
+  void process_text(citer beg, citer end);
+  void tokenize(const std::string& tmplt);
 };

 }
--- a/src/token.cpp
+++ b/src/token.cpp
@ -19,7 +19,9 @@ token::token(const std::string& str, std::size_t left, std::size_t right):
    m_raw(str), m_eol(false), m_ws_only(false)
 {
  if (left != 0 && right != 0) {
-    if (str[left] == '{' && str[str.size() - right - 1] == '}') {
+    if (str[left] == '=' && str[str.size() - right - 1] == '=') {
+      m_type = type::delimiter_change;
+    } else if (str[left] == '{' && str[str.size() - right - 1] == '}') {
      m_type = type::unescaped_variable;
      m_name = {first_not_ws(str.begin() + left + 1, str.end() - right),
          first_not_ws(str.rbegin() + 1 + right, str.rend() - left) + 1};
--- a/src/token.hpp
+++ b/src/token.hpp
@ -8,7 +8,7 @@ class token {
 public:
  enum class type {
    text, variable, section_open, section_close, inverted_section_open,
-    unescaped_variable, comment, partial
+    unescaped_variable, comment, partial, delimiter_change
  };
  token(const std::string& str, std::size_t left = 0, std::size_t right = 0);
  type token_type() const { return m_type; };
--- a/test/data/changing_delimiters.hpp
+++ b/test/data/changing_delimiters.hpp
@ -0,0 +1,4 @@
+const mstch::node changing_delimiters_data = mstch::map{
+  {"foo", std::string{"foooooooooooooo"}},
+  {"bar", std::string{"<b>bar!</b>"}}
+};
--- a/test/data/delimiter/changing_delimiters.mustache
+++ b/test/data/delimiter/changing_delimiters.mustache
--- a/test/data/delimiter/changing_delimiters.txt
+++ b/test/data/delimiter/changing_delimiters.txt
--- a/test/data/delimiter/changing_delimiters.js
+++ b/test/data/delimiter/changing_delimiters.js
@ -1,4 +0,0 @@
-({
-  "foo": "foooooooooooooo",
-  "bar": "<b>bar!</b>"
-})
--- a/test/data/delimiter/delimiters.js
+++ b/test/data/delimiter/delimiters.js
@ -1,6 +0,0 @@
-({
-  first: "It worked the first time.",
-  second: "And it worked the second time.",
-  third: "Then, surprisingly, it worked the third time.",
-  fourth: "Fourth time also fine!."
-})
--- a/test/data/delimiters.hpp
+++ b/test/data/delimiters.hpp
@ -0,0 +1,6 @@
+const mstch::node delimiters_data = mstch::map{
+  {"first", std::string{"It worked the first time."}},
+  {"second", std::string{"And it worked the second time."}},
+  {"third", std::string{"Then, surprisingly, it worked the third time."}},
+  {"fourth", std::string{"Fourth time also fine!."}}
+};
--- a/test/data/delimiter/delimiters.mustache
+++ b/test/data/delimiter/delimiters.mustache
--- a/test/data/delimiter/delimiters.txt
+++ b/test/data/delimiter/delimiters.txt
--- a/test/headerize.cpp
+++ b/test/headerize.cpp
@ -6,20 +6,10 @@
 #include <boost/program_options/variables_map.hpp>
 #include <boost/program_options/parsers.hpp>

-void wrap_code(std::istream& input, std::ostream& output) {
-  std::string line;
-  while (std::getline(input, line)) {
-    output << line;
-    if (!input.eof())
-      output << std::endl;
-  }
-  output << std::endl;
-}
-
 void wrap_string(std::istream& input, std::ostream& output,
    const std::string& variable_name)
 {
-  output << "const std::string " << variable_name << "{" << std::endl;;
+  output << "const std::string " << variable_name << "{\n";
  std::string line;
  while (std::getline(input, line)) {
    boost::replace_all(line, "\\", "\\\\");
@ -27,9 +17,9 @@ void wrap_string(std::istream& input, std::ostream& output,
    output << "  \"" << line;
    if (!input.eof())
      output << "\\n";
-    output << "\"" << std::endl;
+    output << "\"\n";
  }
-  output << "};" << std::endl;
+  output << "};\n";
 }

 int main(int argc, char* argv[]) {
@ -54,34 +44,32 @@ int main(int argc, char* argv[]) {
  }

  if (!vm.count("output")) {
-    std::cout << "Output file not set" << std::endl;
+    std::cerr << "Output file not set" << std::endl;
    return 1;
  }

  std::ofstream output(vm["output"].as<std::string>(), std::ios::out);

  if (vm.count("namespace"))
-    output << "namespace " << vm["namespace"].as<std::string>() << " {" << std::endl;
+    output << "namespace " << vm["namespace"].as<std::string>() << " {\n";

-  if (vm.count("input-string")) {
-    for (auto& string_filename: vm["input-string"].as<std::vector<std::string>>()) {
-      std::ifstream input(string_filename, std::ios::in);
-      wrap_string(input, output,
-          boost::replace_all_copy(string_filename, ".", "_"));
+  if (vm.count("input-string"))
+    for (auto& filename: vm["input-string"].as<std::vector<std::string>>()) {
+      std::ifstream input(filename, std::ios::in);
+      wrap_string(input, output, boost::replace_all_copy(filename, ".", "_"));
      input.close();
    }
-  }

-  if (vm.count("input-code")) {
-    for (auto& data_filename: vm["input-code"].as<std::vector<std::string>>()) {
-      std::ifstream input(data_filename, std::ios::in);
-      wrap_code(input, output);
+  if (vm.count("input-code"))
+    for (auto& filename: vm["input-code"].as<std::vector<std::string>>()) {
+      std::ifstream input(filename, std::ios::in);
+      output << std::string{(std::istreambuf_iterator<char>(input)),
+          std::istreambuf_iterator<char>()} << std::endl;
      input.close();
    }
-  }

  if (vm.count("namespace"))
-    output << "}" << std::endl;
+    output << "}\n";

  output.close();

--- a/test/test_main.cpp
+++ b/test/test_main.cpp
@ -20,9 +20,11 @@ MSTCH_TEST(array_of_strings)
 MSTCH_TEST(backslashes)
 MSTCH_TEST(bug_11_eating_whitespace)
 MSTCH_TEST(bug_length_property)
+MSTCH_TEST(changing_delimiters)
 MSTCH_TEST(comments)
 MSTCH_TEST(complex)
 MSTCH_TEST(context_lookup)
+MSTCH_TEST(delimiters)
 MSTCH_TEST(disappearing_whitespace)
 MSTCH_TEST(dot_notation)
 MSTCH_TEST(double_render)