From a9ff0924014ed2467a0c51c69f5bbfdb4981bcde Mon Sep 17 00:00:00 2001 From: King_DuckZ Date: Tue, 30 Jan 2018 10:39:33 +0000 Subject: [PATCH] WiP - do item counting in mstch variants correctly. --- map_form.txt | 85 +++++++++++++++++++++++++++++++++++++++++ src/scraplang/apply.cpp | 44 +++++++++++++++++++-- 2 files changed, 125 insertions(+), 4 deletions(-) create mode 100644 map_form.txt diff --git a/map_form.txt b/map_form.txt new file mode 100644 index 0000000..894f85e --- /dev/null +++ b/map_form.txt @@ -0,0 +1,85 @@ +apply {{mustache_name}} to {{pages}} + A = /html/head/text() + struct B + C default("n/a") = //table[@class="wikitable sortable"]/tr/td[4]/a/text() + D default("0") = //table[@class="wikitable sortable"]/tr/td[3]/text() + struct E + F = /html/head/inner_names/text() + G = /html/head/inner_probabilities/text() + end + H = /html/head/inner_names/text() + end + I = /html/head/inner_names/text() +end + +==mustache_name +blah +==end + + +The above should result in the following: + +A[] +B[] --- C + --- D + --- E[] --- F + --- G + --- H +--- I[] + +For example, given these query results: +A[] = {a1, a2, a3} +C = c1 +D[] = {d1, d2} +F[] = {f1, f2, f3} +G = g1 +h = h1 +i = i1 + +then the complete result in tree form shall be: + +{ + A => [a1, a2, a3], + B => [ + { + C => c1, + D => d1, + E => [ + { + F => f1, + G => g1 + }, { + F => f2, + G => "" + }, { + F => f3, + G => "" + } + ], + H => h1 + }, { + C => "", + D => d2, + E => [ + { + F => f1, + G => g1 + }, { + F => f2, + G => "" + }, { + F => f3, + G => "" + } + ], + H => "" + } + ] +} + +Please note that: + +* arrays inside a struct turn the struct itself into an array, while its items become just single item values +* if a struct contains no arrays, then the struct shall not become an array - that is, a struct generates an array with as many elements as the largest element in the struct itself +* there are as many of any one struct as the size of the largest array inside it +* nested structs get duplicated in every outer struct they are part of; in the example above E has as many elements as there items in F (2, the largest between F and G), and the whole array of E is duplicated in every element of B diff --git a/src/scraplang/apply.cpp b/src/scraplang/apply.cpp index b71266b..10e9575 100644 --- a/src/scraplang/apply.cpp +++ b/src/scraplang/apply.cpp @@ -173,8 +173,36 @@ namespace duck { namespace sl { MustacheEntry* m_current_mustache; }; - mstch::map to_mustache_dict_recursive (const EntryNode& parNode, std::string_view parSrc, XPathRunner& parRunner) { + struct ItemCountingVisitor : public boost::static_visitor { + template + std::size_t operator()(const T&) const { return 1; } + std::size_t operator()(const mstch::array& parItem) const { return parItem.size(); } + std::size_t operator()(const std::vector& parItem) const { return parItem.size(); } + }; + + std::size_t largest_array_size_in (mstch::map& parMap) { + typedef ItemCountingVisitor ITC; + using boost::apply_visitor; + + if (parMap.empty()) + return 0; + + return apply_visitor( + ITC(), + std::max_element(parMap.begin(), parMap.end(), [](const auto& a, const auto& b) { + return apply_visitor(ITC(), a.second) < apply_visitor(ITC(), b.second); + })->second + ); + } + + mstch::map to_mustache_dict_recursive ( + const EntryNode& parNode, + std::string_view parSrc, + XPathRunner& parRunner, + bool parMakeVecsSameSize + ) { mstch::map retval; + for (const XPathElement* xpath : parNode.xpaths) { assert(xpath); std::cout << "Running query for \"" << xpath->name << "\"\n"; @@ -198,8 +226,16 @@ namespace duck { namespace sl { for (auto& curr_struct : parNode.structs) { assert(not curr_struct.name.empty()); - retval[std::string(curr_struct.name)] = - to_mustache_dict_recursive(curr_struct, parSrc, parRunner); + + mstch::array extracted_struct; + auto new_struct = to_mustache_dict_recursive(curr_struct, parSrc, parRunner, false); + const std::size_t extracted_struct_size = largest_array_size_in(new_struct); + std::cout << "Largest array size in \"" << curr_struct.name << "\" = " << extracted_struct_size << '\n'; + for (auto&& itm : new_struct) { + + } + + // retval[std::string(curr_struct.name)] = } return retval; @@ -220,7 +256,7 @@ namespace duck { namespace sl { assert(false); //not implemented } - mstch::map curr_entry_map = to_mustache_dict_recursive(entry.second, src_url, parRunner); + mstch::map curr_entry_map = to_mustache_dict_recursive(entry.second, src_url, parRunner, false); curr_entry_map.merge(std::move(retval)); retval.swap(curr_entry_map); }