1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2024-11-25 00:53:43 +00:00

Bugfixes and improvements

This commit is contained in:
King_DuckZ 2016-04-21 00:20:00 +02:00
parent 653371763b
commit e3e704c50f
2 changed files with 11 additions and 9 deletions

View file

@ -153,6 +153,8 @@ int damerau_levenshtein_with_size (
const char* target;
const char* source_index_1;
const char* target_index_1;
const char* source_end = parSource + strlen(parSource);
const char* target_end = parTarget + strlen(parTarget);
Character source_char;
Character target_char;
Character source_char_0;
@ -184,8 +186,8 @@ int damerau_levenshtein_with_size (
source = parSource;
target = parTarget;
source_char_0 = utf8_advance(&source, parSource + parSourceLen);
target_char_0 = utf8_advance(&target, parTarget + parTargetLen);
source_char_0 = utf8_advance(&source, source_end);
target_char_0 = utf8_advance(&target, target_end);
source_index_1 = source;
target_index_1 = target;
if (source_char_0 != target_char_0) {
@ -196,7 +198,7 @@ int damerau_levenshtein_with_size (
assert(source = source_index_1);
for (i = 1; i < parSourceLen; ++i) {
source_char = utf8_advance(&source, parSource + parSourceLen);
source_char = utf8_advance(&source, source_end);
delete_distance = table[i - 1 + 0 * parSourceLen] + parDeleteCost;
insert_distance = (i + 1) * parDeleteCost + parInsertCost;
match_distance = i * parDeleteCost +
@ -208,7 +210,7 @@ int damerau_levenshtein_with_size (
assert(target == target_index_1);
for (j = 1; j < parTargetLen; ++j) {
target_char = utf8_advance(&target, parTarget + parTargetLen);
target_char = utf8_advance(&target, target_end);
delete_distance = (j + 1) * parInsertCost + parDeleteCost;
insert_distance = table[0 + (j - 1) * parSourceLen] + parInsertCost;
match_distance = j * parInsertCost +
@ -220,11 +222,11 @@ int damerau_levenshtein_with_size (
source = source_index_1;
for (i = 1; i < parSourceLen; ++i) {
source_char = utf8_advance(&source, parSource + parSourceLen);
source_char = utf8_advance(&source, source_end);
maxSourceLetterMatchIndex = (source_char == target_char_0 ? 0 : -1);
target = target_index_1;
for (j = 1; j < parTargetLen; ++j) {
target_char = utf8_advance(&target, parTarget + parTargetLen);
target_char = utf8_advance(&target, target_end);
candidateSwapIndex = get_value(sourceIndexByCharacter, target_char);
j_swap = maxSourceLetterMatchIndex;
delete_distance = table[(i - 1) + j * parSourceLen] + parDeleteCost;

View file

@ -60,7 +60,7 @@ ascii:
}
Character utf8_advance (const char** parString, const char* parStringEnd) {
const Character masks[6] = {0x00, 0x1f, 0x0f, 0x07, 0x03, 0x03};
const Character masks[6] = {0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x03};
Character retval;
uint8_t curr_code;
int seq_len;
@ -79,9 +79,9 @@ Character utf8_advance (const char** parString, const char* parStringEnd) {
}
retval = curr_code bitand masks[seq_len - 1];
for (z = 0; z < (seq_len - 1) and ++(*parString) < parStringEnd; ++z) {
for (z = 0; ++(*parString) < parStringEnd and z < (seq_len - 1); ++z) {
curr_code = **parString;
if (curr_code bitand 0xc0 != 0x80)
if ((curr_code bitand 0xc0) != 0x80)
return 0;
retval = (retval << 6) bitor (curr_code bitand 0x3f);
}