Musa-STL-Cpp/lib/Base/String_Matching.cpp

266 lines
7.6 KiB
C++

// #move to substring matching algorithms
#include <memory.h> // memchr
#include <immintrin.h> // _mm256 AVX2
// #TODO: rename these routines, add routines for 3B and 4B string matching.
// For >= 5B, use BMH or something like it.
// Reference: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html
bool memchr2c_avx2 (u8*p, u8 a, u8 b, u16 len) {
// For 2-byte substring search ("ab" in "abacus")
if (len < 2) return false;
__m256i va = _mm256_set1_epi8((char)a);
__m256i vb = _mm256_set1_epi8((char)b);
/**AVX-512BW __mmask64 k = _mm512_cmpeq_epi8_mask(v0, va) & _mm512_cmpeq_epi8_mask(v1, vb); if (k) return p + _tzcnt_u64(k); */
u16 i = 0;
// load 33 bytes total via two overlapping loads
for (; i + 33 <= len; i += 32) {
__m256i v0 = _mm256_loadu_si256((__m256i*)(p + i));
__m256i v1 = _mm256_loadu_si256((__m256i*)(p + i + 1));
__m256i ca = _mm256_cmpeq_epi8(v0, va);
__m256i cb = _mm256_cmpeq_epi8(v1, vb);
u32 mask = _mm256_movemask_epi8(_mm256_and_si256(ca, cb));
if (mask) return true; //p + i + _tzcnt_u32(mask);
}
// scalar tail
for (; i + 1 < len; i++)
if (p[i] == a && p[i+1] == b)
return true;
return false;
}
bool memchr2_avx2 (u8*p, u8 a, u8 b, u16 len) {
// For two non-consecutive chars a and b anywhere in target string.
__m256i va = _mm256_set1_epi8((char)a);
__m256i vb = _mm256_set1_epi8((char)b);
/** AVX-512BW: __mmask64 k = _mm512_cmpeq_epi8_mask(v, va) | _mm512_cmpeq_epi8_mask(v, vb); if (k) return p + _tzcnt_u64(k);*/
u16 i = 0;
for (; i + 32 <= len; i += 32) {
__m256i v = _mm256_loadu_si256((__m256i*)(p + i));
__m256i ca = _mm256_cmpeq_epi8(v, va);
__m256i cb = _mm256_cmpeq_epi8(v, vb);
__m256i m = _mm256_or_si256(ca, cb);
u32 mask = _mm256_movemask_epi8(m);
if (mask) { return true; }
}
for (; i < len; i += 1) {
if (p[i] == a || p[i] == b) return true;
}
return false;
}
bool memchr_avx2 (u8* p, u8 c, u16 len) {
// Broadcast 8-bit integer a to all elements of dst. (vpbroadcastb)
__m256i vneedle = _mm256_set1_epi8((char)c);
/** AVX-512BW: __m512i v = _mm512_loadu_si512(p); __mmask64 k = _mm512_cmpeq_epi8_mask(v, vneedle); if (k) return p + _tzcnt_u64(k); */
u16 i = 0;
for (; i + 32 <= len; i += 32) {
// Load 256-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
__m256i v = _mm256_loadu_si256((__m256i*)(p + i));
// Compare packed 8-bit integers in a and b for equality, and store the results in dst.
__m256i cmp = _mm256_cmpeq_epi8(v, vneedle);
// Create mask from the most significant bit of each 8-bit element in a, and store the result in dst.
u32 mask = _mm256_movemask_epi8(cmp);
if (mask) {
return true;
}
}
// scalar tail:
for (; i < len; i += 1) {
if (p[i] == c) return true;
}
return false;
}
bool equal_nocase (string a, string b) {
if (a.count != b.count) return false;
for_each(i, a) {
if (to_lower_ascii(a[i]) != to_lower_ascii(b[i])) {
return false;
}
}
return true;
}
s64 find_index_from_left_no_case (string s, string substring, s64 start_index = 0) {
if (!substring) return -1;
for (s64 i = start_index; i < (s.count - substring.count + 1); i += 1) {
string t = string_view(s, i, substring.count);
if (equal_nocase(t, substring)) return i;
}
return -1;
}
// Fuzzy_Filter implementation copied from focus-editor
// source: https://github.com/focus-editor/focus
struct Fuzzy_Filter {
string full_string;
struct Chunk {
string str;
ArrayView<string> chars;
};
ArrayView<Chunk> chunks;
};
void fuzzy_filter_free (Fuzzy_Filter* ff) {
string_free(ff->full_string);
array_free(ff->chunks);
zero_struct(ff);
}
Fuzzy_Filter construct_fuzzy_filter (string filter_string, bool multi_chunk_search=false) {
push_allocator(temp());
string s = copy_string(trim(filter_string));
if (!s) return {};
Array<Fuzzy_Filter::Chunk> chunks;
chunks.allocator = temp();
ArrayView<string> strings;
if (multi_chunk_search) {
u8 space = ' ';
replace_chars(s, "\\/", space); // < THIS DOESN'T WORK FOR FILESYSTEM SEARCHES!
strings = string_split(s, space);
} else {
strings = ArrayView<string>(1);
strings[0] = s;
}
for_each(i, strings) {
string current = strings[i];
if (!current) continue;
Array<string> chars;
chars.allocator = temp();
array_reserve(chars, strings.count);
u8* t = current.data;
while (t < current.data + current.count) {
string substring;
substring.data = t;
t = unicode_next_character(t);
substring.count = t - substring.data;
array_add(chars, substring);
}
array_add(chunks, {s, chars});
}
Fuzzy_Filter ff = {};
ff.full_string = s;
ff.chunks = chunks;
return ff;
}
ArrayView<bool> fuzzy_match (string str, Fuzzy_Filter filter, bool exact_match_only,
s32* score, bool* exact_match) {
ArrayView<bool> highlights = ArrayView<bool>(str.count);
if (!is_valid(filter.chunks)) {
(*score) = 0;
(*exact_match) = false;
return highlights;
}
// #TODO: Document the scoring system more clearly.
constexpr s32 MAX_CHARS = 256;
{ // Try matching the full string first and rank it highest
s64 index = find_index_from_left_no_case(str, filter.full_string, 0);
if (index >= 0) {
memset(highlights.data + index, 1, filter.full_string.count);
(*score) = (MAX_CHARS + filter.full_string.count) * (MAX_CHARS - index);
(*exact_match) = true;
return highlights;
}
}
auto_release_temp();
push_allocator(temp());
s32 final_score = 0;
ArrayView<bool> chunk_highlights = ArrayView<bool>(str.count, false);
// Try matching each chunk exactly and accept the match if at least one matches
for_each(c, filter.chunks) {
Fuzzy_Filter::Chunk chunk = filter.chunks[c];
s32 chunk_score = 0;
memset(chunk_highlights.data, 0, chunk_highlights.count);
s64 index = find_index_from_left_no_case(str, chunk.str, 0);
if (index >= 0) {
memset(chunk_highlights.data + index, 1, chunk.str.count);
chunk_score += (MAX_CHARS/2 + chunk.str.count) * (MAX_CHARS - index) - str.count;
}
if (chunk_score >= 0) {
final_score += chunk_score;
for_each(ch, chunk_highlights) {
highlights[ch] |= chunk_highlights[ch];
}
}
}
if (final_score > 0) {
(*score) = final_score;
(*exact_match) = true; // if we had at least one exact match don't proceed
return highlights;
}
if (final_score <= 0 && exact_match_only) {
(*score) = final_score;
(*exact_match) = false;
return highlights;
}
// Now match individual characters:
for_each(c, filter.chunks) {
Fuzzy_Filter::Chunk chunk = filter.chunks[c];
s32 pos = 0;
s32 chunk_score = 0;
memset(chunk_highlights.data, 0, chunk_highlights.count);
// Try matching the full chunk first:
for_each(i, chunk.chars) {
string character = chunk.chars[i];
s64 index = find_index_from_left_no_case(str, character, pos);
if (index < 0) {
chunk_score = -1;
break;
}
chunk_highlights[index] = true;
chunk_score += 10 * (MAX_CHARS - index); // The closer to the beginning the better
pos = index + character.count;
}
if (chunk_score >= 0) {
final_score += chunk_score;
// apply chunk highlights
for_each(ch, chunk_highlights) {
highlights[ch] |= chunk_highlights[ch];
}
}
}
(*score) = final_score;
(*exact_match) = false;
return highlights;
}