diff --git a/src/main/CMakeLists.txt b/src/main/CMakeLists.txt
index 5954f6f..f86ff13 100644
--- a/src/main/CMakeLists.txt
+++ b/src/main/CMakeLists.txt
@@ -6,11 +6,12 @@ add_executable(${PROJECT_NAME}
main.c
findactions.c
builtin_feats.c
+ damerau_levenshtein.c
)
target_include_directories(${PROJECT_NAME}
PRIVATE ${CMAKE_SOURCE_DIR}/include
- PRIVATE ${CMAKE_SOURCE_DIR}/lib/pbl/pbl/src
+ PRIVATE ${CMAKE_SOURCE_DIR}/lib/pbl/pbl/src/src
)
target_link_libraries(${PROJECT_NAME}
diff --git a/src/main/damerau_levenshtein.c b/src/main/damerau_levenshtein.c
new file mode 100644
index 0000000..25dabe4
--- /dev/null
+++ b/src/main/damerau_levenshtein.c
@@ -0,0 +1,248 @@
+/* Copyright (c) 2012 Kevin L. Stern
+ * Copyright (c) 2016 Michele Santullo
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+/*
+ * See
+ * http://software-and-algorithms.blogspot.ca/2012/09/damerau-levenshtein-edit-distance.html
+ * and
+ * https://github.com/KevinStern/software-and-algorithms/blob/master/src/main/java/blogspot/software_and_algorithms/stern_library/string/DamerauLevenshteinAlgorithm.java
+ */
+
+
+/**
+ * The Damerau-Levenshtein Algorithm is an extension to the Levenshtein
+ * Algorithm which solves the edit distance problem between a source string and
+ * a target string with the following operations:
+ *
+ *
+ * - Character Insertion
+ * - Character Deletion
+ * - Character Replacement
+ * - Adjacent Character Swap
+ *
+ *
+ * Note that the adjacent character swap operation is an edit that may be
+ * applied when two adjacent characters in the source string match two adjacent
+ * characters in the target string, but in reverse order, rather than a general
+ * allowance for adjacent character swaps.
+ *
+ *
+ * This implementation allows the client to specify the costs of the various
+ * edit operations with the restriction that the cost of two swap operations
+ * must not be less than the cost of a delete operation followed by an insert
+ * operation. This restriction is required to preclude two swaps involving the
+ * same character being required for optimality which, in turn, enables a fast
+ * dynamic programming solution.
+ *
+ *
+ * The running time of the Damerau-Levenshtein algorithm is O(n*m) where n is
+ * the length of the source string and m is the length of the target string.
+ * This implementation consumes O(n*m) space.
+ *
+ * @author Kevin L. Stern
+ */
+
+#include "damerau_levenshtein.h"
+#include "pbl_wrapper.h"
+#include
+#include
+#include
+#include
+#include
+
+//See: http://stackoverflow.com/questions/3437404/min-and-max-in-c
+#define min(a,b) \
+ ({ __typeof__ (a) _a = (a); \
+ __typeof__ (b) _b = (b); \
+ _a < _b ? _a : _b; })
+#define max(a,b) \
+ ({ __typeof__ (a) _a = (a); \
+ __typeof__ (b) _b = (b); \
+ _a > _b ? _a : _b; })
+
+typedef wchar_t Character;
+
+static void insert_pair (PblMap* parMap, Character parKey, int parValue) {
+ const int retval = pblMapAdd(
+ parMap,
+ &parKey,
+ sizeof(parKey),
+ &parValue,
+ sizeof(parValue)
+ );
+ assert(0 <= retval);
+}
+
+static int get_value (PblMap* parMap, Character parKey) {
+ size_t ret_len;
+ void* value = pblMapGet(parMap, &parKey, sizeof(parKey), &ret_len);
+ assert(not value or (sizeof(int) == ret_len));
+ return (value ? *(int*)value : -1);
+}
+
+int damerau_levenshtein (
+ const char* parSource,
+ const char* parTarget,
+ int parDeleteCost,
+ int parInsertCost,
+ int parReplaceCost,
+ int parSwapCost
+)
+{
+ return damerau_levenshtein_with_size(
+ parSource,
+ strlen(parSource),
+ parTarget,
+ strlen(parTarget),
+ parDeleteCost,
+ parInsertCost,
+ parReplaceCost,
+ parSwapCost
+ );
+}
+
+/**
+ * Compute the Damerau-Levenshtein distance between the specified source
+ * string and the specified target string.
+ */
+int damerau_levenshtein_with_size (
+ const char* parSource,
+ size_t parSourceLen,
+ const char* parTarget,
+ size_t parTargetLen,
+ int parDeleteCost,
+ int parInsertCost,
+ int parReplaceCost,
+ int parSwapCost
+)
+{
+ int i;
+ int j;
+ int* table;
+ PblMap* sourceIndexByCharacter;
+ int delete_distance;
+ int insert_distance;
+ int match_distance;
+ int swap_distance;
+ int maxSourceLetterMatchIndex;
+ int candidateSwapIndex;
+ int i_swap;
+ int j_swap;
+ int pre_swap_cost;
+ int retval;
+
+ assert(parSource);
+ assert(parTarget);
+
+ /*
+ * Required to facilitate the premise to the algorithm that two swaps of the
+ * same character are never required for optimality.
+ */
+ if (2 * parSwapCost < parInsertCost + parDeleteCost) {
+ /*throw new IllegalArgumentException("Unsupported cost assignment");*/
+ return -1;
+ }
+
+ if (0 == parSourceLen)
+ return parTargetLen * parInsertCost;
+ if (0 == parTargetLen)
+ return parSourceLen * parDeleteCost;
+
+ const int table_length = parSourceLen * parTargetLen;
+ table = (int*)malloc(sizeof(int) * table_length);
+ memset(table, 0, sizeof(int) * table_length);
+
+ sourceIndexByCharacter = pblMapNewHashMap();
+ assert(sourceIndexByCharacter);
+
+ if (parSource[0] != parTarget[0]) {
+ table[0 /*source*/ + 0 /*target*/ * parSourceLen] =
+ min(parReplaceCost, parDeleteCost + parInsertCost);
+ }
+ insert_pair(sourceIndexByCharacter, parSource[0], 0);
+
+ for (i = 1; i < parSourceLen; ++i) {
+ delete_distance = table[i - 1 + 0 * parSourceLen];
+ insert_distance = (i + 1) * parDeleteCost + parInsertCost;
+ match_distance = i * parDeleteCost +
+ (parSource[i] == parTarget[i] ? 0 : parReplaceCost);
+ table[i + 0 * parSourceLen] = min(
+ min(delete_distance, insert_distance), match_distance
+ );
+ }
+
+ for (j = 1; j < parTargetLen; ++j) {
+ delete_distance = (j + 1) * parInsertCost + parDeleteCost;
+ insert_distance = table[0 + (j - 1) * parSourceLen] + parInsertCost;
+ match_distance = j * parInsertCost +
+ (parSource[0] == parTarget[j] ? 0 : parReplaceCost);
+ table[0 + j * parSourceLen] = min(
+ min(delete_distance, insert_distance), match_distance
+ );
+ }
+
+ for (i = 1; i < parSourceLen; ++i) {
+ maxSourceLetterMatchIndex = (parSource[i] == parTarget[0] ? 0 : -1);
+ for (j = 1; j < parTargetLen; ++j) {
+ candidateSwapIndex =
+ get_value(sourceIndexByCharacter, parTarget[j]);
+ j_swap = maxSourceLetterMatchIndex;
+ delete_distance = table[(i - 1) + j * parSourceLen] + parDeleteCost;
+ insert_distance = table[i + (j - 1) * parSourceLen] + parInsertCost;
+ match_distance = table[(i - 1) + (j - 1) * parSourceLen];
+ if (parSource[i] != parTarget[j])
+ match_distance += parReplaceCost;
+ else
+ maxSourceLetterMatchIndex = j;
+
+ if (-1 != candidateSwapIndex and -1 != j_swap) {
+ i_swap = candidateSwapIndex;
+ if (0 == i_swap and 0 == j_swap)
+ pre_swap_cost = 0;
+ else
+ pre_swap_cost = table[
+ max(0, i_swap - 1) + max(0, j_swap - 1) * parSourceLen
+ ];
+ swap_distance = pre_swap_cost + (i - i_swap - 1) *
+ parDeleteCost + (j - j_swap - 1) * parInsertCost +
+ parSwapCost;
+ }
+ else {
+ swap_distance = INT_MAX;
+ }
+ table[i + j * parSourceLen] = min(
+ min(
+ min(delete_distance, insert_distance),
+ match_distance
+ ),
+ swap_distance
+ );
+ }
+ insert_pair(sourceIndexByCharacter, parSource[i], i);
+ }
+
+ retval = table[(parSourceLen - 1) + (parTargetLen - 1) * parSourceLen];
+ free(table);
+ pblMapFree(sourceIndexByCharacter);
+ return retval;
+}
diff --git a/src/main/damerau_levenshtein.h b/src/main/damerau_levenshtein.h
new file mode 100644
index 0000000..a16cccb
--- /dev/null
+++ b/src/main/damerau_levenshtein.h
@@ -0,0 +1,44 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer". If not, see .
+ */
+
+#ifndef id88738025C6B24BDEB604A5AE3C36EE8D
+#define id88738025C6B24BDEB604A5AE3C36EE8D
+
+#include "helpers/compatibility.h"
+#include "stddef.h"
+
+int damerau_levenshtein (
+ const char* parSource,
+ const char* parTarget,
+ int parDeleteCost,
+ int parInsertCost,
+ int parReplaceCost,
+ int parSwapCost
+) a_pure;
+
+int damerau_levenshtein_with_size (
+ const char* parSource,
+ size_t parSourceLen,
+ const char* parTarget,
+ size_t parTargetLen,
+ int parDeleteCost,
+ int parInsertCost,
+ int parReplaceCost,
+ int parSwapCost
+) a_pure;
+
+#endif
diff --git a/src/main/main.c b/src/main/main.c
index 872fbbf..a0376c4 100644
--- a/src/main/main.c
+++ b/src/main/main.c
@@ -21,6 +21,7 @@
#include "findactions.h"
#include "helpers/lengthof.h"
#include "builtin_feats.h"
+#include "damerau_levenshtein.h"
#include
#include
#include
@@ -55,6 +56,7 @@ static size_t foreach_avail_action ( int(*parFunc)(const char*, const void*), ch
static int printf_stream ( const char* parMsg, const void* parStream );
static int printf_stream_inplace ( const char* parMsg, const void* parPrintContext );
static int same_action ( const char* parAction1, const void* parAction2 );
+static int find_similar ( const char* parAction, const void* parUserInput );
static void print_usage ( void );
static int manage_commandline ( int parArgc, char* parArgv[], char** parActions, size_t parActionCount, int* parShouldQuit );
@@ -88,8 +90,17 @@ int main (int parArgc, char* parArgv[]) {
selected_action = foreach_avail_action(&same_action, actions, actions_count, specified_action);
if (actions_count == selected_action) {
- fprintf(stderr, "Unrecognized action \"%s\" - available actions are:\n", specified_action);
- foreach_avail_action(&printf_stream, actions, actions_count, stderr);
+ //Find a possible mispelling and show a hint to the user if any
+ selected_action = foreach_avail_action(&find_similar, actions, actions_count, specified_action);
+ if (selected_action < actions_count) {
+ fprintf(stderr, "Unrecognized action \"%s\" - maybe you meant \"%s\"?\n",
+ specified_action,
+ get_actionname(actions[selected_action])
+ );
+ }
+ else {
+ fprintf(stderr, "Unrecognized action \"%s\"\n", specified_action);
+ }
free_actions(actions, actions_count);
return 2;
}
@@ -177,6 +188,14 @@ static int same_action (const char* parAction1, const void* parAction2) {
}
}
+static int find_similar (const char* parAction, const void* parUserInput) {
+ const int distance = damerau_levenshtein((const char*)parUserInput, parAction, 1, 1, 1, 1);
+ if (distance <= 2)
+ return 1;
+ else
+ return 0;
+}
+
static void print_usage() {
printf("--help, -h - show this help\n");
printf("--builtin, -b - show build info\n");
diff --git a/src/main/pbl_wrapper.h b/src/main/pbl_wrapper.h
new file mode 100644
index 0000000..6db1714
--- /dev/null
+++ b/src/main/pbl_wrapper.h
@@ -0,0 +1,25 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer". If not, see .
+ */
+
+#ifndef idDB955D222C4A4AF5B9BD1A4A0BBDD9E3
+#define idDB955D222C4A4AF5B9BD1A4A0BBDD9E3
+
+#include
+#include
+#include "pbl.h"
+
+#endif