// #move to substring matching algorithms #include // memchr #include // _mm256 AVX2 // #TODO: rename these routines, add routines for 3B and 4B string matching. // For >= 5B, use BMH or something like it. // Reference: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html bool memchr2c_avx2 (u8*p, u8 a, u8 b, u16 len) { // For 2-byte substring search ("ab" in "abacus") if (len < 2) return false; __m256i va = _mm256_set1_epi8((char)a); __m256i vb = _mm256_set1_epi8((char)b); /**AVX-512BW __mmask64 k = _mm512_cmpeq_epi8_mask(v0, va) & _mm512_cmpeq_epi8_mask(v1, vb); if (k) return p + _tzcnt_u64(k); */ u16 i = 0; // load 33 bytes total via two overlapping loads for (; i + 33 <= len; i += 32) { __m256i v0 = _mm256_loadu_si256((__m256i*)(p + i)); __m256i v1 = _mm256_loadu_si256((__m256i*)(p + i + 1)); __m256i ca = _mm256_cmpeq_epi8(v0, va); __m256i cb = _mm256_cmpeq_epi8(v1, vb); u32 mask = _mm256_movemask_epi8(_mm256_and_si256(ca, cb)); if (mask) return true; //p + i + _tzcnt_u32(mask); } // scalar tail for (; i + 1 < len; i++) if (p[i] == a && p[i+1] == b) return true; return false; } bool memchr2_avx2 (u8*p, u8 a, u8 b, u16 len) { // For two non-consecutive chars a and b anywhere in target string. __m256i va = _mm256_set1_epi8((char)a); __m256i vb = _mm256_set1_epi8((char)b); /** AVX-512BW: __mmask64 k = _mm512_cmpeq_epi8_mask(v, va) | _mm512_cmpeq_epi8_mask(v, vb); if (k) return p + _tzcnt_u64(k);*/ u16 i = 0; for (; i + 32 <= len; i += 32) { __m256i v = _mm256_loadu_si256((__m256i*)(p + i)); __m256i ca = _mm256_cmpeq_epi8(v, va); __m256i cb = _mm256_cmpeq_epi8(v, vb); __m256i m = _mm256_or_si256(ca, cb); u32 mask = _mm256_movemask_epi8(m); if (mask) { return true; } } for (; i < len; i += 1) { if (p[i] == a || p[i] == b) return true; } return false; } bool memchr_avx2 (u8* p, u8 c, u16 len) { // Broadcast 8-bit integer a to all elements of dst. (vpbroadcastb) __m256i vneedle = _mm256_set1_epi8((char)c); /** AVX-512BW: __m512i v = _mm512_loadu_si512(p); __mmask64 k = _mm512_cmpeq_epi8_mask(v, vneedle); if (k) return p + _tzcnt_u64(k); */ u16 i = 0; for (; i + 32 <= len; i += 32) { // Load 256-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary. __m256i v = _mm256_loadu_si256((__m256i*)(p + i)); // Compare packed 8-bit integers in a and b for equality, and store the results in dst. __m256i cmp = _mm256_cmpeq_epi8(v, vneedle); // Create mask from the most significant bit of each 8-bit element in a, and store the result in dst. u32 mask = _mm256_movemask_epi8(cmp); if (mask) { return true; } } // scalar tail: for (; i < len; i += 1) { if (p[i] == c) return true; } return false; } bool equal_nocase (string a, string b) { if (a.count != b.count) return false; for_each(i, a) { if (to_lower_ascii(a[i]) != to_lower_ascii(b[i])) { return false; } } return true; } s64 find_index_from_left_no_case (string s, string substring, s64 start_index = 0) { if (!substring) return -1; for (s64 i = start_index; i < (s.count - substring.count + 1); i += 1) { string t = string_view(s, i, substring.count); if (equal_nocase(t, substring)) return i; } return -1; } // Fuzzy_Filter implementation copied from focus-editor // source: https://github.com/focus-editor/focus struct Fuzzy_Filter { string full_string; struct Chunk { string str; ArrayView chars; }; ArrayView chunks; }; void fuzzy_filter_free (Fuzzy_Filter* ff) { string_free(ff->full_string); array_free(ff->chunks); zero_struct(ff); } Fuzzy_Filter construct_fuzzy_filter (string filter_string, bool multi_chunk_search=false) { push_allocator(temp()); string s = copy_string(trim(filter_string)); if (!s) return {}; Array chunks; chunks.allocator = temp(); ArrayView strings; if (multi_chunk_search) { u8 space = ' '; replace_chars(s, "\\/", space); // < THIS DOESN'T WORK FOR FILESYSTEM SEARCHES! strings = string_split(s, space); } else { strings = ArrayView(1); strings[0] = s; } for_each(i, strings) { string current = strings[i]; if (!current) continue; Array chars; chars.allocator = temp(); array_reserve(chars, strings.count); u8* t = current.data; while (t < current.data + current.count) { string substring; substring.data = t; t = unicode_next_character(t); substring.count = t - substring.data; array_add(chars, substring); } array_add(chunks, {s, chars}); } Fuzzy_Filter ff = {}; ff.full_string = s; ff.chunks = chunks; return ff; } ArrayView fuzzy_match (string str, Fuzzy_Filter filter, bool exact_match_only, s32* score, bool* exact_match) { ArrayView highlights = ArrayView(str.count); if (!is_valid(filter.chunks)) { (*score) = 0; (*exact_match) = false; return highlights; } // #TODO: Document the scoring system more clearly. constexpr s32 MAX_CHARS = 256; { // Try matching the full string first and rank it highest s64 index = find_index_from_left_no_case(str, filter.full_string, 0); if (index >= 0) { memset(highlights.data + index, 1, filter.full_string.count); (*score) = (MAX_CHARS + filter.full_string.count) * (MAX_CHARS - index); (*exact_match) = true; return highlights; } } auto_release_temp(); push_allocator(temp()); s32 final_score = 0; ArrayView chunk_highlights = ArrayView(str.count, false); // Try matching each chunk exactly and accept the match if at least one matches for_each(c, filter.chunks) { Fuzzy_Filter::Chunk chunk = filter.chunks[c]; s32 chunk_score = 0; memset(chunk_highlights.data, 0, chunk_highlights.count); s64 index = find_index_from_left_no_case(str, chunk.str, 0); if (index >= 0) { memset(chunk_highlights.data + index, 1, chunk.str.count); chunk_score += (MAX_CHARS/2 + chunk.str.count) * (MAX_CHARS - index) - str.count; } if (chunk_score >= 0) { final_score += chunk_score; for_each(ch, chunk_highlights) { highlights[ch] |= chunk_highlights[ch]; } } } if (final_score > 0) { (*score) = final_score; (*exact_match) = true; // if we had at least one exact match don't proceed return highlights; } if (final_score <= 0 && exact_match_only) { (*score) = final_score; (*exact_match) = false; return highlights; } // Now match individual characters: for_each(c, filter.chunks) { Fuzzy_Filter::Chunk chunk = filter.chunks[c]; s32 pos = 0; s32 chunk_score = 0; memset(chunk_highlights.data, 0, chunk_highlights.count); // Try matching the full chunk first: for_each(i, chunk.chars) { string character = chunk.chars[i]; s64 index = find_index_from_left_no_case(str, character, pos); if (index < 0) { chunk_score = -1; break; } chunk_highlights[index] = true; chunk_score += 10 * (MAX_CHARS - index); // The closer to the beginning the better pos = index + character.count; } if (chunk_score >= 0) { final_score += chunk_score; // apply chunk highlights for_each(ch, chunk_highlights) { highlights[ch] |= chunk_highlights[ch]; } } } (*score) = final_score; (*exact_match) = false; return highlights; }