266 lines
7.6 KiB
C++
266 lines
7.6 KiB
C++
// #move to substring matching algorithms
|
|
#include <memory.h> // memchr
|
|
#include <immintrin.h> // _mm256 AVX2
|
|
|
|
// #TODO: rename these routines, add routines for 3B and 4B string matching.
|
|
// For >= 5B, use BMH or something like it.
|
|
|
|
// Reference: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html
|
|
bool memchr2c_avx2 (u8*p, u8 a, u8 b, u16 len) {
|
|
// For 2-byte substring search ("ab" in "abacus")
|
|
if (len < 2) return false;
|
|
__m256i va = _mm256_set1_epi8((char)a);
|
|
__m256i vb = _mm256_set1_epi8((char)b);
|
|
/**AVX-512BW __mmask64 k = _mm512_cmpeq_epi8_mask(v0, va) & _mm512_cmpeq_epi8_mask(v1, vb); if (k) return p + _tzcnt_u64(k); */
|
|
u16 i = 0;
|
|
// load 33 bytes total via two overlapping loads
|
|
for (; i + 33 <= len; i += 32) {
|
|
__m256i v0 = _mm256_loadu_si256((__m256i*)(p + i));
|
|
__m256i v1 = _mm256_loadu_si256((__m256i*)(p + i + 1));
|
|
|
|
__m256i ca = _mm256_cmpeq_epi8(v0, va);
|
|
__m256i cb = _mm256_cmpeq_epi8(v1, vb);
|
|
|
|
u32 mask = _mm256_movemask_epi8(_mm256_and_si256(ca, cb));
|
|
|
|
if (mask) return true; //p + i + _tzcnt_u32(mask);
|
|
}
|
|
|
|
// scalar tail
|
|
for (; i + 1 < len; i++)
|
|
if (p[i] == a && p[i+1] == b)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
bool memchr2_avx2 (u8*p, u8 a, u8 b, u16 len) {
|
|
// For two non-consecutive chars a and b anywhere in target string.
|
|
__m256i va = _mm256_set1_epi8((char)a);
|
|
__m256i vb = _mm256_set1_epi8((char)b);
|
|
/** AVX-512BW: __mmask64 k = _mm512_cmpeq_epi8_mask(v, va) | _mm512_cmpeq_epi8_mask(v, vb); if (k) return p + _tzcnt_u64(k);*/
|
|
u16 i = 0;
|
|
for (; i + 32 <= len; i += 32) {
|
|
__m256i v = _mm256_loadu_si256((__m256i*)(p + i));
|
|
|
|
__m256i ca = _mm256_cmpeq_epi8(v, va);
|
|
__m256i cb = _mm256_cmpeq_epi8(v, vb);
|
|
__m256i m = _mm256_or_si256(ca, cb);
|
|
|
|
u32 mask = _mm256_movemask_epi8(m);
|
|
|
|
if (mask) { return true; }
|
|
}
|
|
|
|
for (; i < len; i += 1) {
|
|
if (p[i] == a || p[i] == b) return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool memchr_avx2 (u8* p, u8 c, u16 len) {
|
|
// Broadcast 8-bit integer a to all elements of dst. (vpbroadcastb)
|
|
__m256i vneedle = _mm256_set1_epi8((char)c);
|
|
/** AVX-512BW: __m512i v = _mm512_loadu_si512(p); __mmask64 k = _mm512_cmpeq_epi8_mask(v, vneedle); if (k) return p + _tzcnt_u64(k); */
|
|
u16 i = 0;
|
|
for (; i + 32 <= len; i += 32) {
|
|
// Load 256-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
|
|
__m256i v = _mm256_loadu_si256((__m256i*)(p + i));
|
|
// Compare packed 8-bit integers in a and b for equality, and store the results in dst.
|
|
__m256i cmp = _mm256_cmpeq_epi8(v, vneedle);
|
|
// Create mask from the most significant bit of each 8-bit element in a, and store the result in dst.
|
|
u32 mask = _mm256_movemask_epi8(cmp);
|
|
|
|
if (mask) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// scalar tail:
|
|
for (; i < len; i += 1) {
|
|
if (p[i] == c) return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool equal_nocase (string a, string b) {
|
|
if (a.count != b.count) return false;
|
|
for_each(i, a) {
|
|
if (to_lower_ascii(a[i]) != to_lower_ascii(b[i])) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
s64 find_index_from_left_no_case (string s, string substring, s64 start_index = 0) {
|
|
if (!substring) return -1;
|
|
|
|
for (s64 i = start_index; i < (s.count - substring.count + 1); i += 1) {
|
|
string t = string_view(s, i, substring.count);
|
|
if (equal_nocase(t, substring)) return i;
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
// Fuzzy_Filter implementation copied from focus-editor
|
|
// source: https://github.com/focus-editor/focus
|
|
struct Fuzzy_Filter {
|
|
string full_string;
|
|
struct Chunk {
|
|
string str;
|
|
ArrayView<string> chars;
|
|
};
|
|
|
|
ArrayView<Chunk> chunks;
|
|
};
|
|
|
|
void fuzzy_filter_free (Fuzzy_Filter* ff) {
|
|
string_free(ff->full_string);
|
|
array_free(ff->chunks);
|
|
zero_struct(ff);
|
|
}
|
|
|
|
Fuzzy_Filter construct_fuzzy_filter (string filter_string, bool multi_chunk_search=false) {
|
|
push_allocator(temp());
|
|
string s = copy_string(trim(filter_string));
|
|
if (!s) return {};
|
|
|
|
Array<Fuzzy_Filter::Chunk> chunks;
|
|
chunks.allocator = temp();
|
|
|
|
ArrayView<string> strings;
|
|
if (multi_chunk_search) {
|
|
u8 space = ' ';
|
|
replace_chars(s, "\\/", space); // < THIS DOESN'T WORK FOR FILESYSTEM SEARCHES!
|
|
strings = string_split(s, space);
|
|
} else {
|
|
strings = ArrayView<string>(1);
|
|
strings[0] = s;
|
|
}
|
|
|
|
for_each(i, strings) {
|
|
string current = strings[i];
|
|
if (!current) continue;
|
|
|
|
Array<string> chars;
|
|
chars.allocator = temp();
|
|
|
|
array_reserve(chars, strings.count);
|
|
u8* t = current.data;
|
|
while (t < current.data + current.count) {
|
|
string substring;
|
|
substring.data = t;
|
|
t = unicode_next_character(t);
|
|
substring.count = t - substring.data;
|
|
|
|
array_add(chars, substring);
|
|
}
|
|
|
|
array_add(chunks, {s, chars});
|
|
}
|
|
|
|
Fuzzy_Filter ff = {};
|
|
ff.full_string = s;
|
|
ff.chunks = chunks;
|
|
|
|
return ff;
|
|
}
|
|
|
|
ArrayView<bool> fuzzy_match (string str, Fuzzy_Filter filter, bool exact_match_only,
|
|
s32* score, bool* exact_match) {
|
|
ArrayView<bool> highlights = ArrayView<bool>(str.count);
|
|
if (!is_valid(filter.chunks)) {
|
|
(*score) = 0;
|
|
(*exact_match) = false;
|
|
return highlights;
|
|
}
|
|
|
|
// #TODO: Document the scoring system more clearly.
|
|
constexpr s32 MAX_CHARS = 256;
|
|
{ // Try matching the full string first and rank it highest
|
|
s64 index = find_index_from_left_no_case(str, filter.full_string, 0);
|
|
if (index >= 0) {
|
|
memset(highlights.data + index, 1, filter.full_string.count);
|
|
(*score) = (MAX_CHARS + filter.full_string.count) * (MAX_CHARS - index);
|
|
(*exact_match) = true;
|
|
return highlights;
|
|
}
|
|
}
|
|
|
|
auto_release_temp();
|
|
push_allocator(temp());
|
|
s32 final_score = 0;
|
|
ArrayView<bool> chunk_highlights = ArrayView<bool>(str.count, false);
|
|
|
|
// Try matching each chunk exactly and accept the match if at least one matches
|
|
for_each(c, filter.chunks) {
|
|
Fuzzy_Filter::Chunk chunk = filter.chunks[c];
|
|
s32 chunk_score = 0;
|
|
memset(chunk_highlights.data, 0, chunk_highlights.count);
|
|
|
|
s64 index = find_index_from_left_no_case(str, chunk.str, 0);
|
|
if (index >= 0) {
|
|
memset(chunk_highlights.data + index, 1, chunk.str.count);
|
|
chunk_score += (MAX_CHARS/2 + chunk.str.count) * (MAX_CHARS - index) - str.count;
|
|
}
|
|
|
|
if (chunk_score >= 0) {
|
|
final_score += chunk_score;
|
|
for_each(ch, chunk_highlights) {
|
|
highlights[ch] |= chunk_highlights[ch];
|
|
}
|
|
}
|
|
}
|
|
|
|
if (final_score > 0) {
|
|
(*score) = final_score;
|
|
(*exact_match) = true; // if we had at least one exact match don't proceed
|
|
return highlights;
|
|
}
|
|
|
|
if (final_score <= 0 && exact_match_only) {
|
|
(*score) = final_score;
|
|
(*exact_match) = false;
|
|
return highlights;
|
|
}
|
|
|
|
// Now match individual characters:
|
|
for_each(c, filter.chunks) {
|
|
Fuzzy_Filter::Chunk chunk = filter.chunks[c];
|
|
s32 pos = 0;
|
|
s32 chunk_score = 0;
|
|
memset(chunk_highlights.data, 0, chunk_highlights.count);
|
|
|
|
// Try matching the full chunk first:
|
|
for_each(i, chunk.chars) {
|
|
string character = chunk.chars[i];
|
|
s64 index = find_index_from_left_no_case(str, character, pos);
|
|
if (index < 0) {
|
|
chunk_score = -1;
|
|
break;
|
|
}
|
|
chunk_highlights[index] = true;
|
|
chunk_score += 10 * (MAX_CHARS - index); // The closer to the beginning the better
|
|
|
|
pos = index + character.count;
|
|
}
|
|
|
|
if (chunk_score >= 0) {
|
|
final_score += chunk_score;
|
|
// apply chunk highlights
|
|
for_each(ch, chunk_highlights) {
|
|
highlights[ch] |= chunk_highlights[ch];
|
|
}
|
|
}
|
|
}
|
|
|
|
(*score) = final_score;
|
|
(*exact_match) = false;
|
|
return highlights;
|
|
}
|