mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2025-02-19 12:04:54 +00:00
Merge remote-tracking branch 'gitan/master'
This commit is contained in:
commit
e6bfd0ecfd
17 changed files with 592 additions and 61 deletions
3
.gitmodules
vendored
3
.gitmodules
vendored
|
@ -1,3 +1,6 @@
|
|||
[submodule "test/gtest"]
|
||||
path = test/gtest
|
||||
url = https://github.com/google/googletest.git
|
||||
[submodule "lib/pbl/pbl"]
|
||||
path = lib/pbl/pbl
|
||||
url = https://github.com/peterGraf/pbl.git
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
|
||||
cmake_minimum_required(VERSION 3.3 FATAL_ERROR)
|
||||
set(bare_name "dindexer")
|
||||
project("${bare_name}-if" VERSION 0.1.4 LANGUAGES CXX C)
|
||||
list (APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules)
|
||||
|
@ -22,16 +22,23 @@ include(CPack)
|
|||
|
||||
option(DINDEXER_DEBUG_CFG_FILE "Enable to set the config file path to the build path" OFF)
|
||||
option(DINDEXER_WITH_MEDIA_AUTODETECT "Enable code that tries to autodetect the media type and sets --type automatically" ON)
|
||||
option(DINDEXER_NATIVE_RELEASE "Pass the -march=native flag to the compiler for release builds" OFF)
|
||||
if (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION_MAJOR EQUAL "5")
|
||||
option(DINDEXER_CXX11_ABI "Controls if _GLIBCXX_USE_CXX11_ABI gets set to 0 or not" ON)
|
||||
endif()
|
||||
if(DINDEXER_NATIVE_RELEASE)
|
||||
set(march_flag "-march=native")
|
||||
else()
|
||||
set(march_flag "")
|
||||
endif()
|
||||
set(DINDEXER_ACTIONS_PATH "${CMAKE_CURRENT_BINARY_DIR}/src" CACHE STRING "Actions search path")
|
||||
string(REGEX MATCH "[^/].*" ACTIONS_PATH_INSTALL "${DINDEXER_ACTIONS_PATH}")
|
||||
set(DB_OWNER_NAME "$ENV{USER}" CACHE STRING "Name that will be used as the DB owner name")
|
||||
set(PROJECT_VERSION_BETA "1")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -std=c++11 -Wall -Wextra -Wno-missing-field-initializers")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++11 -Wall -Wextra -O3 -fomit-frame-pointer -Wno-missing-field-initializers")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -std=c++11 -Wall -Wextra -Wno-missing-field-initializers -O0")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++11 -Wall -Wextra -fomit-frame-pointer -Wno-missing-field-initializers ${march_flag}")
|
||||
set(DINDEXER_PUB_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
|
||||
set(PBL_WITH_TESTS OFF)
|
||||
get_git_head_revision(GIT_REFSPEC PROJECT_VERSION_GIT)
|
||||
|
||||
if ("${DINDEXER_CONFIG_FILE}" STREQUAL "")
|
||||
|
@ -46,7 +53,7 @@ message(STATUS "Config file set to \"${DINDEXER_CONFIG_FILE}\"")
|
|||
find_package(Boost 1.53.0 REQUIRED COMPONENTS program_options)
|
||||
find_package(PostgreSQL 8.3 REQUIRED)
|
||||
find_package(YamlCpp 0.5.1 REQUIRED)
|
||||
import_libpqtypes_project("${PostgreSQL_INCLUDE_DIRS}")
|
||||
import_libpqtypes_project("${PostgreSQL_INCLUDE_DIRS}" "-O3 ${march_flag}")
|
||||
|
||||
add_library(${PROJECT_NAME} INTERFACE)
|
||||
add_library(${bare_name}-inc INTERFACE)
|
||||
|
@ -94,6 +101,7 @@ endif()
|
|||
add_subdirectory(src/pq)
|
||||
add_subdirectory(src/common)
|
||||
add_subdirectory(src/machinery)
|
||||
add_subdirectory(lib/pbl)
|
||||
|
||||
#Actions
|
||||
add_subdirectory(src/main)
|
||||
|
@ -102,6 +110,8 @@ add_subdirectory(src/delete)
|
|||
add_subdirectory(src/query)
|
||||
add_subdirectory(src/locate)
|
||||
add_subdirectory(src/navigate)
|
||||
|
||||
#Tests
|
||||
add_subdirectory(test/gtest)
|
||||
add_subdirectory(test/unit)
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
include(ExternalProject)
|
||||
|
||||
function (import_libpqtypes_project PostgreSQL_INCLUDE_DIRS)
|
||||
function (import_libpqtypes_project PostgreSQL_INCLUDE_DIRS RELEASE_C_FLAGS)
|
||||
#find_path(libpqtypes_base_path NAMES src/libpqtypes.h PATHS ${CMAKE_CURRENT_SOURCE_DIR}/lib NO_DEFAULT_PATH)
|
||||
file(GLOB_RECURSE libpqtypes_base_path "lib/**libpqtypes.h")
|
||||
|
||||
|
@ -13,7 +13,8 @@ function (import_libpqtypes_project PostgreSQL_INCLUDE_DIRS)
|
|||
DOWNLOAD_COMMAND ""
|
||||
SOURCE_DIR ${libpqtypes_base_path}
|
||||
PREFIX ${libpqtypes_prefix}
|
||||
CONFIGURE_COMMAND ${libpqtypes_base_path}/configure --prefix ${libpqtypes_prefix} "CPPFLAGS=${libpqtypes_inc_dirs}" --quiet
|
||||
CONFIGURE_COMMAND ${libpqtypes_base_path}/configure --prefix ${libpqtypes_prefix} "CPPFLAGS=${libpqtypes_inc_dirs}"
|
||||
"CFLAGS=$<$<CONFIG:Release>:${RELEASE_C_FLAGS}>" --quiet
|
||||
BUILD_COMMAND ${MAKE}
|
||||
BUILD_IN_SOURCE 0
|
||||
)
|
||||
|
|
|
@ -1,31 +1,40 @@
|
|||
if (CMAKE_COMPILER_IS_GNUCXX)
|
||||
exec_program(${CMAKE_CXX_COMPILER} ARGS --version OUTPUT_VARIABLE version_string)
|
||||
string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" CMAKE_CXX_COMPILER_VERSION "${version_string}")
|
||||
string(REGEX MATCHALL "[0-9]+" version_nums "${CMAKE_CXX_COMPILER_VERSION}")
|
||||
list(GET version_nums 0 CMAKE_CXX_COMPILER_VERSION_MAJOR)
|
||||
list(GET version_nums 1 CMAKE_CXX_COMPILER_VERSION_MINOR)
|
||||
list(GET version_nums 2 CMAKE_CXX_COMPILER_VERSION_PATCH)
|
||||
function (set_compiler_ver spliced_version_var)
|
||||
set(whole_version "${${spliced_version_var}}")
|
||||
set(var_prefix "${spliced_version_var}")
|
||||
string(REGEX MATCHALL "[0-9]+" version_nums "${whole_version}")
|
||||
list(GET version_nums 0 compiler_MAJOR)
|
||||
list(GET version_nums 1 compiler_MINOR)
|
||||
list(GET version_nums 2 compiler_PATCH)
|
||||
set(${var_prefix}_MAJOR "${compiler_MAJOR}" PARENT_SCOPE)
|
||||
set(${var_prefix}_MINOR "${compiler_MINOR}" PARENT_SCOPE)
|
||||
set(${var_prefix}_PATCH "${compiler_PATCH}" PARENT_SCOPE)
|
||||
unset(compiler_MAJOR)
|
||||
unset(compiler_MINOR)
|
||||
unset(compiler_PATCH)
|
||||
unset(version_nums)
|
||||
unset(version_string)
|
||||
else()
|
||||
set(CMAKE_CXX_COMPILER_VERSION "0.0.0")
|
||||
set(CMAKE_CXX_COMPILER_VERSION_MAJOR "0")
|
||||
set(CMAKE_CXX_COMPILER_VERSION_MINOR "0")
|
||||
set(CMAKE_CXX_COMPILER_VERSION_PATCH "0")
|
||||
unset(whole_version)
|
||||
unset(var_prefix)
|
||||
endfunction()
|
||||
|
||||
if (NOT CMAKE_CXX_COMPILER_VERSION)
|
||||
if (CMAKE_COMPILER_IS_GNUCXX)
|
||||
exec_program(${CMAKE_CXX_COMPILER} ARGS --version OUTPUT_VARIABLE version_string)
|
||||
string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" CMAKE_CXX_COMPILER_VERSION "${version_string}")
|
||||
unset(version_string)
|
||||
else()
|
||||
set(CMAKE_CXX_COMPILER_VERSION "0.0.0")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (CMAKE_COMPILER_IS_GNUCC)
|
||||
exec_program(${CMAKE_C_COMPILER} ARGS --version OUTPUT_VARIABLE version_string)
|
||||
string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" CMAKE_C_COMPILER_VERSION "${version_string}")
|
||||
string(REGEX MATCHALL "[0-9]+" version_nums "${CMAKE_C_COMPILER_VERSION}")
|
||||
list(GET version_nums 0 CMAKE_C_COMPILER_VERSION_MAJOR)
|
||||
list(GET version_nums 1 CMAKE_C_COMPILER_VERSION_MINOR)
|
||||
list(GET version_nums 2 CMAKE_C_COMPILER_VERSION_PATCH)
|
||||
unset(version_nums)
|
||||
unset(version_string)
|
||||
else()
|
||||
set(CMAKE_C_COMPILER_VERSION "0.0.0")
|
||||
set(CMAKE_C_COMPILER_VERSION_MAJOR "0")
|
||||
set(CMAKE_C_COMPILER_VERSION_MINOR "0")
|
||||
set(CMAKE_C_COMPILER_VERSION_PATCH "0")
|
||||
if (NOT CMAKE_C_COMPILER_VERSION)
|
||||
if (CMAKE_COMPILER_IS_GNUCC)
|
||||
exec_program(${CMAKE_C_COMPILER} ARGS --version OUTPUT_VARIABLE version_string)
|
||||
string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" CMAKE_C_COMPILER_VERSION "${version_string}")
|
||||
unset(version_string)
|
||||
else()
|
||||
set(CMAKE_C_COMPILER_VERSION "0.0.0")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set_compiler_ver(CMAKE_CXX_COMPILER_VERSION)
|
||||
set_compiler_ver(CMAKE_C_COMPILER_VERSION)
|
||||
|
|
55
lib/pbl/CMakeLists.txt
Normal file
55
lib/pbl/CMakeLists.txt
Normal file
|
@ -0,0 +1,55 @@
|
|||
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
|
||||
project(pbl C)
|
||||
|
||||
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_RELEASE} -Wall")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Wall -O3")
|
||||
|
||||
option(PBL_WITH_TESTS "Enable building test programs" ON)
|
||||
|
||||
#add_library(${PROJECT_NAME} INTERFACE)
|
||||
#target_include_directories(${PROJECT_NAME}
|
||||
# INTERFACE .
|
||||
#)
|
||||
|
||||
add_library(pbl STATIC
|
||||
pbl/src/src/pblStringBuilder.c
|
||||
pbl/src/src/pblPriorityQueue.c
|
||||
pbl/src/src/pblHeap.c
|
||||
pbl/src/src/pblMap.c
|
||||
pbl/src/src/pblSet.c
|
||||
pbl/src/src/pblList.c
|
||||
pbl/src/src/pblCollection.c
|
||||
pbl/src/src/pblIterator.c
|
||||
pbl/src/src/pblisam.c
|
||||
pbl/src/src/pblkf.c
|
||||
pbl/src/src/pblhash.c
|
||||
pbl/src/src/pbl.c
|
||||
)
|
||||
target_include_directories(pbl
|
||||
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src/src
|
||||
)
|
||||
target_compile_definitions(pbl
|
||||
PUBLIC PBLTEST
|
||||
)
|
||||
|
||||
if (PBL_WITH_TESTS)
|
||||
enable_testing()
|
||||
|
||||
set(pbl_tests
|
||||
httst
|
||||
kftst
|
||||
iftst
|
||||
kfblockprint
|
||||
ListTest
|
||||
SetTest
|
||||
Perform
|
||||
MapTest
|
||||
PriorityQueueTest
|
||||
)
|
||||
|
||||
foreach (test_name ${pbl_tests})
|
||||
add_executable(pbl${test_name} pbl/src/src/pbl${test_name}.c)
|
||||
target_link_libraries(pbl${test_name} pbl)
|
||||
add_test(${test_name} pbl${test_name})
|
||||
endforeach()
|
||||
endif()
|
1
lib/pbl/pbl
Submodule
1
lib/pbl/pbl
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 362f7e598dc38aba3c195e10830259f728531e83
|
|
@ -29,4 +29,16 @@
|
|||
#define ACTION_PREFIX "@bare_name@-"
|
||||
#define DB_OWNER_NAME "@DB_OWNER_NAME@"
|
||||
|
||||
#if defined(NDEBUG)
|
||||
# define CMAKE_CXX_FLAGS "@CMAKE_CXX_FLAGS_RELEASE@"
|
||||
# define CMAKE_C_FLAGS "@CMAKE_C_FLAGS_RELEASE@"
|
||||
#else
|
||||
# define CMAKE_CXX_FLAGS "@CMAKE_CXX_FLAGS_DEBUG@"
|
||||
# define CMAKE_C_FLAGS "@CMAKE_C_FLAGS_DEBUG@"
|
||||
#endif
|
||||
#define CMAKE_SYSTEM "@CMAKE_SYSTEM@"
|
||||
#define CMAKE_CXX_COMPILER "@CMAKE_CXX_COMPILER@"
|
||||
#define CMAKE_C_COMPILER "@CMAKE_C_COMPILER@"
|
||||
#define CMAKE_VERSION "@CMAKE_VERSION@"
|
||||
|
||||
#endif
|
||||
|
|
|
@ -6,14 +6,17 @@ add_executable(${PROJECT_NAME}
|
|||
main.c
|
||||
findactions.c
|
||||
builtin_feats.c
|
||||
damerau_levenshtein.c
|
||||
)
|
||||
|
||||
target_include_directories(${PROJECT_NAME}
|
||||
PRIVATE ${CMAKE_SOURCE_DIR}/include
|
||||
PRIVATE ${CMAKE_SOURCE_DIR}/lib/pbl/pbl/src/src
|
||||
)
|
||||
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
PRIVATE ${bare_name}-inc
|
||||
PRIVATE pbl
|
||||
)
|
||||
|
||||
if (DINDEXER_WITH_MEDIA_AUTODETECT)
|
||||
|
|
|
@ -15,24 +15,12 @@
|
|||
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "builtin_feats.h"
|
||||
#include <stdio.h>
|
||||
#include "dindexerConfig.h"
|
||||
|
||||
void print_builtin_feats() {
|
||||
#if VERSION_BETA
|
||||
char beta_str[2] = "b";
|
||||
#else
|
||||
char beta_str[1] = "";
|
||||
#endif
|
||||
|
||||
printf("%s v %d.%d.%d%s\nRev %s\n",
|
||||
PROGRAM_NAME,
|
||||
VERSION_MAJOR,
|
||||
VERSION_MINOR,
|
||||
VERSION_PATCH,
|
||||
beta_str,
|
||||
VERSION_GIT
|
||||
);
|
||||
print_version();
|
||||
|
||||
printf("CONFIG_FILE_PATH = \"%s\"\n", CONFIG_FILE_PATH);
|
||||
printf("ACTIONS_SEARCH_PATH = \"%s\"\n", ACTIONS_SEARCH_PATH);
|
||||
|
@ -47,5 +35,26 @@ void print_builtin_feats() {
|
|||
#else
|
||||
printf("NDEBUG = no (Debug build)\n");
|
||||
#endif
|
||||
printf("Built on %s (CMake %s)\n", CMAKE_SYSTEM, CMAKE_VERSION);
|
||||
printf("Build flags:\n");
|
||||
printf(" %s %s\n", CMAKE_CXX_COMPILER, CMAKE_CXX_FLAGS);
|
||||
printf(" %s %s\n", CMAKE_C_COMPILER, CMAKE_C_FLAGS);
|
||||
printf("DB_OWNER_NAME = \"%s\"\n", DB_OWNER_NAME);
|
||||
}
|
||||
|
||||
void print_version() {
|
||||
#if VERSION_BETA
|
||||
char beta_str[2] = "b";
|
||||
#else
|
||||
char beta_str[1] = "";
|
||||
#endif
|
||||
|
||||
printf("%s v%d.%d.%d%s\nRev %s\n",
|
||||
PROGRAM_NAME,
|
||||
VERSION_MAJOR,
|
||||
VERSION_MINOR,
|
||||
VERSION_PATCH,
|
||||
beta_str,
|
||||
VERSION_GIT
|
||||
);
|
||||
}
|
||||
|
|
|
@ -19,5 +19,6 @@
|
|||
#define id17B851C76AD54C8B9A2098323FC83038
|
||||
|
||||
void print_builtin_feats ( void );
|
||||
void print_version ( void );
|
||||
|
||||
#endif
|
||||
|
|
248
src/main/damerau_levenshtein.c
Normal file
248
src/main/damerau_levenshtein.c
Normal file
|
@ -0,0 +1,248 @@
|
|||
/* Copyright (c) 2012 Kevin L. Stern
|
||||
* Copyright (c) 2016 Michele Santullo
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* See
|
||||
* http://software-and-algorithms.blogspot.ca/2012/09/damerau-levenshtein-edit-distance.html
|
||||
* and
|
||||
* https://github.com/KevinStern/software-and-algorithms/blob/master/src/main/java/blogspot/software_and_algorithms/stern_library/string/DamerauLevenshteinAlgorithm.java
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* The Damerau-Levenshtein Algorithm is an extension to the Levenshtein
|
||||
* Algorithm which solves the edit distance problem between a source string and
|
||||
* a target string with the following operations:
|
||||
*
|
||||
* <ul>
|
||||
* <li>Character Insertion</li>
|
||||
* <li>Character Deletion</li>
|
||||
* <li>Character Replacement</li>
|
||||
* <li>Adjacent Character Swap</li>
|
||||
* </ul>
|
||||
*
|
||||
* Note that the adjacent character swap operation is an edit that may be
|
||||
* applied when two adjacent characters in the source string match two adjacent
|
||||
* characters in the target string, but in reverse order, rather than a general
|
||||
* allowance for adjacent character swaps.
|
||||
* <p>
|
||||
*
|
||||
* This implementation allows the client to specify the costs of the various
|
||||
* edit operations with the restriction that the cost of two swap operations
|
||||
* must not be less than the cost of a delete operation followed by an insert
|
||||
* operation. This restriction is required to preclude two swaps involving the
|
||||
* same character being required for optimality which, in turn, enables a fast
|
||||
* dynamic programming solution.
|
||||
* <p>
|
||||
*
|
||||
* The running time of the Damerau-Levenshtein algorithm is O(n*m) where n is
|
||||
* the length of the source string and m is the length of the target string.
|
||||
* This implementation consumes O(n*m) space.
|
||||
*
|
||||
* @author Kevin L. Stern
|
||||
*/
|
||||
|
||||
#include "damerau_levenshtein.h"
|
||||
#include "pbl_wrapper.h"
|
||||
#include <string.h>
|
||||
#include <iso646.h>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
|
||||
//See: http://stackoverflow.com/questions/3437404/min-and-max-in-c
|
||||
#define min(a,b) \
|
||||
({ __typeof__ (a) _a = (a); \
|
||||
__typeof__ (b) _b = (b); \
|
||||
_a < _b ? _a : _b; })
|
||||
#define max(a,b) \
|
||||
({ __typeof__ (a) _a = (a); \
|
||||
__typeof__ (b) _b = (b); \
|
||||
_a > _b ? _a : _b; })
|
||||
|
||||
typedef wchar_t Character;
|
||||
|
||||
static void insert_pair (PblMap* parMap, Character parKey, int parValue) {
|
||||
const int retval = pblMapAdd(
|
||||
parMap,
|
||||
&parKey,
|
||||
sizeof(parKey),
|
||||
&parValue,
|
||||
sizeof(parValue)
|
||||
);
|
||||
assert(0 <= retval);
|
||||
}
|
||||
|
||||
static int get_value (PblMap* parMap, Character parKey) {
|
||||
size_t ret_len;
|
||||
void* value = pblMapGet(parMap, &parKey, sizeof(parKey), &ret_len);
|
||||
assert(not value or (sizeof(int) == ret_len));
|
||||
return (value ? *(int*)value : -1);
|
||||
}
|
||||
|
||||
int damerau_levenshtein (
|
||||
const char* parSource,
|
||||
const char* parTarget,
|
||||
int parDeleteCost,
|
||||
int parInsertCost,
|
||||
int parReplaceCost,
|
||||
int parSwapCost
|
||||
)
|
||||
{
|
||||
return damerau_levenshtein_with_size(
|
||||
parSource,
|
||||
strlen(parSource),
|
||||
parTarget,
|
||||
strlen(parTarget),
|
||||
parDeleteCost,
|
||||
parInsertCost,
|
||||
parReplaceCost,
|
||||
parSwapCost
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the Damerau-Levenshtein distance between the specified source
|
||||
* string and the specified target string.
|
||||
*/
|
||||
int damerau_levenshtein_with_size (
|
||||
const char* parSource,
|
||||
size_t parSourceLen,
|
||||
const char* parTarget,
|
||||
size_t parTargetLen,
|
||||
int parDeleteCost,
|
||||
int parInsertCost,
|
||||
int parReplaceCost,
|
||||
int parSwapCost
|
||||
)
|
||||
{
|
||||
int i;
|
||||
int j;
|
||||
int* table;
|
||||
PblMap* sourceIndexByCharacter;
|
||||
int delete_distance;
|
||||
int insert_distance;
|
||||
int match_distance;
|
||||
int swap_distance;
|
||||
int maxSourceLetterMatchIndex;
|
||||
int candidateSwapIndex;
|
||||
int i_swap;
|
||||
int j_swap;
|
||||
int pre_swap_cost;
|
||||
int retval;
|
||||
|
||||
assert(parSource);
|
||||
assert(parTarget);
|
||||
|
||||
/*
|
||||
* Required to facilitate the premise to the algorithm that two swaps of the
|
||||
* same character are never required for optimality.
|
||||
*/
|
||||
if (2 * parSwapCost < parInsertCost + parDeleteCost) {
|
||||
/*throw new IllegalArgumentException("Unsupported cost assignment");*/
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (0 == parSourceLen)
|
||||
return parTargetLen * parInsertCost;
|
||||
if (0 == parTargetLen)
|
||||
return parSourceLen * parDeleteCost;
|
||||
|
||||
const int table_length = parSourceLen * parTargetLen;
|
||||
table = (int*)malloc(sizeof(int) * table_length);
|
||||
memset(table, 0, sizeof(int) * table_length);
|
||||
|
||||
sourceIndexByCharacter = pblMapNewHashMap();
|
||||
assert(sourceIndexByCharacter);
|
||||
|
||||
if (parSource[0] != parTarget[0]) {
|
||||
table[0 /*source*/ + 0 /*target*/ * parSourceLen] =
|
||||
min(parReplaceCost, parDeleteCost + parInsertCost);
|
||||
}
|
||||
insert_pair(sourceIndexByCharacter, parSource[0], 0);
|
||||
|
||||
for (i = 1; i < parSourceLen; ++i) {
|
||||
delete_distance = table[i - 1 + 0 * parSourceLen];
|
||||
insert_distance = (i + 1) * parDeleteCost + parInsertCost;
|
||||
match_distance = i * parDeleteCost +
|
||||
(parSource[i] == parTarget[i] ? 0 : parReplaceCost);
|
||||
table[i + 0 * parSourceLen] = min(
|
||||
min(delete_distance, insert_distance), match_distance
|
||||
);
|
||||
}
|
||||
|
||||
for (j = 1; j < parTargetLen; ++j) {
|
||||
delete_distance = (j + 1) * parInsertCost + parDeleteCost;
|
||||
insert_distance = table[0 + (j - 1) * parSourceLen] + parInsertCost;
|
||||
match_distance = j * parInsertCost +
|
||||
(parSource[0] == parTarget[j] ? 0 : parReplaceCost);
|
||||
table[0 + j * parSourceLen] = min(
|
||||
min(delete_distance, insert_distance), match_distance
|
||||
);
|
||||
}
|
||||
|
||||
for (i = 1; i < parSourceLen; ++i) {
|
||||
maxSourceLetterMatchIndex = (parSource[i] == parTarget[0] ? 0 : -1);
|
||||
for (j = 1; j < parTargetLen; ++j) {
|
||||
candidateSwapIndex =
|
||||
get_value(sourceIndexByCharacter, parTarget[j]);
|
||||
j_swap = maxSourceLetterMatchIndex;
|
||||
delete_distance = table[(i - 1) + j * parSourceLen] + parDeleteCost;
|
||||
insert_distance = table[i + (j - 1) * parSourceLen] + parInsertCost;
|
||||
match_distance = table[(i - 1) + (j - 1) * parSourceLen];
|
||||
if (parSource[i] != parTarget[j])
|
||||
match_distance += parReplaceCost;
|
||||
else
|
||||
maxSourceLetterMatchIndex = j;
|
||||
|
||||
if (-1 != candidateSwapIndex and -1 != j_swap) {
|
||||
i_swap = candidateSwapIndex;
|
||||
if (0 == i_swap and 0 == j_swap)
|
||||
pre_swap_cost = 0;
|
||||
else
|
||||
pre_swap_cost = table[
|
||||
max(0, i_swap - 1) + max(0, j_swap - 1) * parSourceLen
|
||||
];
|
||||
swap_distance = pre_swap_cost + (i - i_swap - 1) *
|
||||
parDeleteCost + (j - j_swap - 1) * parInsertCost +
|
||||
parSwapCost;
|
||||
}
|
||||
else {
|
||||
swap_distance = INT_MAX;
|
||||
}
|
||||
table[i + j * parSourceLen] = min(
|
||||
min(
|
||||
min(delete_distance, insert_distance),
|
||||
match_distance
|
||||
),
|
||||
swap_distance
|
||||
);
|
||||
}
|
||||
insert_pair(sourceIndexByCharacter, parSource[i], i);
|
||||
}
|
||||
|
||||
retval = table[(parSourceLen - 1) + (parTargetLen - 1) * parSourceLen];
|
||||
free(table);
|
||||
pblMapFree(sourceIndexByCharacter);
|
||||
return retval;
|
||||
}
|
44
src/main/damerau_levenshtein.h
Normal file
44
src/main/damerau_levenshtein.h
Normal file
|
@ -0,0 +1,44 @@
|
|||
/* Copyright 2015, 2016, Michele Santullo
|
||||
* This file is part of "dindexer".
|
||||
*
|
||||
* "dindexer" is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* "dindexer" is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef id88738025C6B24BDEB604A5AE3C36EE8D
|
||||
#define id88738025C6B24BDEB604A5AE3C36EE8D
|
||||
|
||||
#include "helpers/compatibility.h"
|
||||
#include "stddef.h"
|
||||
|
||||
int damerau_levenshtein (
|
||||
const char* parSource,
|
||||
const char* parTarget,
|
||||
int parDeleteCost,
|
||||
int parInsertCost,
|
||||
int parReplaceCost,
|
||||
int parSwapCost
|
||||
) a_pure;
|
||||
|
||||
int damerau_levenshtein_with_size (
|
||||
const char* parSource,
|
||||
size_t parSourceLen,
|
||||
const char* parTarget,
|
||||
size_t parTargetLen,
|
||||
int parDeleteCost,
|
||||
int parInsertCost,
|
||||
int parReplaceCost,
|
||||
int parSwapCost
|
||||
) a_pure;
|
||||
|
||||
#endif
|
|
@ -21,12 +21,29 @@
|
|||
#include "findactions.h"
|
||||
#include "helpers/lengthof.h"
|
||||
#include "builtin_feats.h"
|
||||
#include "damerau_levenshtein.h"
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <iso646.h>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <getopt.h>
|
||||
#include <errno.h>
|
||||
|
||||
/* This program can be run either with the name of a subcommand as its first
|
||||
* parameter (eg: dindexer locate), plus some optional parameters that are just
|
||||
* passed on to the subcommand, or with options that are really meant for this
|
||||
* program itself, and no subcommand is invoked.
|
||||
* In the first case the program won't try to parse any parameters except from
|
||||
* the first (the subcommand name), which is removed from the parameters list.
|
||||
* The appropriate command is the invoked and all the remanining parameters are
|
||||
* passed to it. No further action is taken by this program, which in fact
|
||||
* terminates right after the subcommand is invoked.
|
||||
* In the second case, the program won't try to invoke any other command. It will
|
||||
* try to parse the command line itself and behave on itself. Passing an action
|
||||
* name in this case is wrong. For example, the command "dindexer --version scan"
|
||||
* is wrong and should be rejected.
|
||||
*/
|
||||
|
||||
struct PrintContext {
|
||||
FILE* stream;
|
||||
|
@ -39,6 +56,7 @@ static size_t foreach_avail_action ( int(*parFunc)(const char*, const void*), ch
|
|||
static int printf_stream ( const char* parMsg, const void* parStream );
|
||||
static int printf_stream_inplace ( const char* parMsg, const void* parPrintContext );
|
||||
static int same_action ( const char* parAction1, const void* parAction2 );
|
||||
static int find_similar ( const char* parAction, const void* parUserInput );
|
||||
static void print_usage ( void );
|
||||
static int manage_commandline ( int parArgc, char* parArgv[], char** parActions, size_t parActionCount, int* parShouldQuit );
|
||||
|
||||
|
@ -65,16 +83,24 @@ int main (int parArgc, char* parArgv[]) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
|
||||
if (optind < parArgc)
|
||||
specified_action = parArgv[optind];
|
||||
if (1 < parArgc)
|
||||
specified_action = parArgv[1];
|
||||
else
|
||||
specified_action = "";
|
||||
selected_action = foreach_avail_action(&same_action, actions, actions_count, specified_action);
|
||||
|
||||
if (actions_count == selected_action) {
|
||||
fprintf(stderr, "Unrecognized action \"%s\" - available actions are:\n", specified_action);
|
||||
foreach_avail_action(&printf_stream, actions, actions_count, stderr);
|
||||
//Find a possible mispelling and show a hint to the user if any
|
||||
selected_action = foreach_avail_action(&find_similar, actions, actions_count, specified_action);
|
||||
if (selected_action < actions_count) {
|
||||
fprintf(stderr, "Unrecognized action \"%s\" - maybe you meant \"%s\"?\n",
|
||||
specified_action,
|
||||
get_actionname(actions[selected_action])
|
||||
);
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "Unrecognized action \"%s\"\n", specified_action);
|
||||
}
|
||||
free_actions(actions, actions_count);
|
||||
return 2;
|
||||
}
|
||||
|
@ -99,12 +125,18 @@ int main (int parArgc, char* parArgv[]) {
|
|||
|
||||
argv = (char**)malloc(sizeof(char*) * (parArgc - 1 + 1));
|
||||
argv[0] = action_path;
|
||||
for (z = 2; z <= parArgc; ++z) {
|
||||
argv[z - 1] = specified_action;
|
||||
for (z = 2; z < parArgc; ++z) {
|
||||
argv[z - 1] = parArgv[z];
|
||||
}
|
||||
argv[parArgc - 1] = NULL;
|
||||
|
||||
/*printf("would call %s\n", action_path);*/
|
||||
execv(action_path, argv);
|
||||
retval = execv(action_path, argv);
|
||||
if (retval < 0) {
|
||||
fprintf(stderr, "Error executing \"%s\": %d:\n%s\n", action_path, errno, strerror(errno));
|
||||
free(action_path);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* the program won't get here, but just to be clean... */
|
||||
free(action_path);
|
||||
|
@ -156,35 +188,53 @@ static int same_action (const char* parAction1, const void* parAction2) {
|
|||
}
|
||||
}
|
||||
|
||||
static int find_similar (const char* parAction, const void* parUserInput) {
|
||||
const int distance = damerau_levenshtein((const char*)parUserInput, parAction, 1, 1, 1, 1);
|
||||
if (distance <= 2)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void print_usage() {
|
||||
printf("--help, -h - show this help\n");
|
||||
printf("--builtin, -b - show build info\n");
|
||||
printf("--printactions=[prefix] - print a complete-friendly list of available commands, filtered by an optional prefix\n");
|
||||
printf("--version, -v - show %s's version and quit", PROGRAM_NAME);
|
||||
}
|
||||
|
||||
static int manage_commandline (int parArgc, char* parArgv[], char** parActions, size_t parActionCount, int* parShouldQuit) {
|
||||
int showbuiltin;
|
||||
int showhelp;
|
||||
int showactions_for_completion;
|
||||
int showversion;
|
||||
int option_index;
|
||||
int getopt_retval;
|
||||
FILE* streamout;
|
||||
int retval;
|
||||
struct PrintContext actions_print_context;
|
||||
|
||||
/*Check if the program should just forward the invocation to some
|
||||
subcommand*/
|
||||
if (2 <= parArgc and parArgv[1][0] != '\0' and parArgv[1][0] != '-') {
|
||||
*parShouldQuit = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct option opts[] = {
|
||||
{ "printactions", optional_argument, NULL, 'a' },
|
||||
{ "builtin", no_argument, &showbuiltin, 1 },
|
||||
{ "help", no_argument, &showhelp, 1 },
|
||||
{ "version", no_argument, &showversion, 1 },
|
||||
{ 0, 0, 0, 0 }
|
||||
};
|
||||
|
||||
memset(&actions_print_context, 0, sizeof(actions_print_context));
|
||||
option_index = 0;
|
||||
showbuiltin = showhelp = showactions_for_completion = 0;
|
||||
showversion = showbuiltin = showhelp = showactions_for_completion = 0;
|
||||
*parShouldQuit = 0;
|
||||
|
||||
while (0 <= (getopt_retval = getopt_long(parArgc, parArgv, "bh", opts, &option_index))) {
|
||||
while (0 <= (getopt_retval = getopt_long(parArgc, parArgv, "bhv", opts, &option_index))) {
|
||||
switch (getopt_retval) {
|
||||
case 'h':
|
||||
showhelp = 1;
|
||||
|
@ -198,10 +248,20 @@ static int manage_commandline (int parArgc, char* parArgv[], char** parActions,
|
|||
case 'a':
|
||||
showactions_for_completion = 1;
|
||||
actions_print_context.prefix_filter = (optarg ? optarg : "");
|
||||
break;
|
||||
case 'v':
|
||||
showversion = 1;
|
||||
break;
|
||||
}
|
||||
option_index = 0;
|
||||
}
|
||||
|
||||
if (optind != parArgc) {
|
||||
fprintf(stderr, "Invalid command line - unexpected \"%s\"\n", parArgv[optind]);
|
||||
*parShouldQuit = 1;
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (parArgc < 2 or showhelp) {
|
||||
*parShouldQuit = 1;
|
||||
if (parArgc < 2) {
|
||||
|
@ -215,6 +275,7 @@ static int manage_commandline (int parArgc, char* parArgv[], char** parActions,
|
|||
streamout = stdout;
|
||||
retval = 0;
|
||||
}
|
||||
fprintf(streamout, "\n");
|
||||
fprintf(streamout, "Available actions are:\n");
|
||||
foreach_avail_action(&printf_stream, parActions, parActionCount, streamout);
|
||||
return retval;
|
||||
|
@ -224,6 +285,11 @@ static int manage_commandline (int parArgc, char* parArgv[], char** parActions,
|
|||
print_builtin_feats();
|
||||
return 0;
|
||||
}
|
||||
else if (showversion) {
|
||||
*parShouldQuit = 1;
|
||||
print_version();
|
||||
return 0;
|
||||
}
|
||||
else if (showactions_for_completion) {
|
||||
*parShouldQuit = 1;
|
||||
actions_print_context.stream = stdout;
|
||||
|
|
25
src/main/pbl_wrapper.h
Normal file
25
src/main/pbl_wrapper.h
Normal file
|
@ -0,0 +1,25 @@
|
|||
/* Copyright 2015, 2016, Michele Santullo
|
||||
* This file is part of "dindexer".
|
||||
*
|
||||
* "dindexer" is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* "dindexer" is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef idDB955D222C4A4AF5B9BD1A4A0BBDD9E3
|
||||
#define idDB955D222C4A4AF5B9BD1A4A0BBDD9E3
|
||||
|
||||
#include <stdio.h>
|
||||
#include <memory.h>
|
||||
#include "pbl.h"
|
||||
|
||||
#endif
|
|
@ -16,14 +16,53 @@
|
|||
*/
|
||||
|
||||
#include "linereader.hpp"
|
||||
#include "listdircontent.hpp"
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <readline/readline.h>
|
||||
#include <readline/history.h>
|
||||
#include <cassert>
|
||||
#include <ciso646>
|
||||
|
||||
namespace din {
|
||||
LineReader::LineReader() {
|
||||
namespace {
|
||||
char* custom_generator (const char* parText, int parState) {
|
||||
static int list_index, len;
|
||||
|
||||
if (not parState) {
|
||||
list_index = 0;
|
||||
len = std::strlen(parText);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
//char* custom_generator (const char* parText, int parState) {
|
||||
//}
|
||||
|
||||
//char** custom_completion (const char* parText, int parStart, int parEnd) {
|
||||
// char** matches = nullptr;
|
||||
|
||||
// if (0 == parStart) {
|
||||
// matches = rl_completion_matches(const_cast<char*>(parText), &custom_generator);
|
||||
// }
|
||||
// else {
|
||||
// //See the hack described here:
|
||||
// //http://cc.byexamples.com/2008/06/16/gnu-readline-implement-custom-auto-complete/
|
||||
// rl_bind_key('\t', &rl_abort);
|
||||
// }
|
||||
// return matches;
|
||||
//}
|
||||
} //unnamed namespace
|
||||
|
||||
LineReader::LineReader (const ListDirContent* parLS) :
|
||||
m_ls(parLS)
|
||||
{
|
||||
assert(m_ls);
|
||||
|
||||
//rl_attempted_completion_function = &custom_completion;
|
||||
rl_completion_entry_function = &custom_generator;
|
||||
rl_bind_key('\t', &rl_complete);
|
||||
}
|
||||
|
||||
std::string LineReader::read (const std::string& parMessage) {
|
||||
|
|
|
@ -21,12 +21,17 @@
|
|||
#include <string>
|
||||
|
||||
namespace din {
|
||||
class ListDirContent;
|
||||
|
||||
class LineReader {
|
||||
public:
|
||||
LineReader ( void );
|
||||
explicit LineReader ( const ListDirContent* parLS );
|
||||
~LineReader ( void ) noexcept = default;
|
||||
|
||||
std::string read ( const std::string& parMessage );
|
||||
|
||||
private:
|
||||
const ListDirContent* m_ls;
|
||||
};
|
||||
} //namespace din
|
||||
|
||||
|
|
|
@ -83,13 +83,13 @@ namespace {
|
|||
|
||||
void do_navigation (din::DBSource& parDB) {
|
||||
const std::string prompt;
|
||||
din::LineReader lines;
|
||||
din::ListDirContent ls(&parDB);
|
||||
din::LineReader lines(&ls);
|
||||
|
||||
bool running = true;
|
||||
std::string curr_line;
|
||||
din::CommandProcessor proc;
|
||||
din::GenericPath dir_man;
|
||||
din::ListDirContent ls(&parDB);
|
||||
proc.add_command("exit", &on_exit, 0);
|
||||
proc.add_command("cd", std::function<void(const std::string&)>(std::bind(&din::GenericPath::push_piece, &dir_man, std::placeholders::_1)), 1);
|
||||
proc.add_command("disconnect", std::function<void()>(std::bind(&din::DBSource::disconnect, &parDB)), 0);
|
||||
|
|
Loading…
Add table
Reference in a new issue