diff options
Diffstat (limited to 'util/compress/libdeflate/lib/adler32_vec_template.h')
-rw-r--r-- | util/compress/libdeflate/lib/adler32_vec_template.h | 124 |
1 files changed, 0 insertions, 124 deletions
diff --git a/util/compress/libdeflate/lib/adler32_vec_template.h b/util/compress/libdeflate/lib/adler32_vec_template.h deleted file mode 100644 index 4eb8c2a82..000000000 --- a/util/compress/libdeflate/lib/adler32_vec_template.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * adler32_vec_template.h - template for vectorized Adler-32 implementations - * - * Copyright 2016 Eric Biggers - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * This file contains a template for vectorized Adler-32 implementations. - * - * The inner loop between reductions modulo 65521 of an unvectorized Adler-32 - * implementation looks something like this: - * - * do { - * s1 += *p; - * s2 += s1; - * } while (++p != chunk_end); - * - * For vectorized calculation of s1, we only need to sum the input bytes. They - * can be accumulated into multiple counters which are eventually summed - * together. - * - * For vectorized calculation of s2, the basic idea is that for each iteration - * that processes N bytes, we can perform the following vectorizable - * calculation: - * - * s2 += N*byte_1 + (N-1)*byte_2 + (N-2)*byte_3 + ... + 1*byte_N - * - * Or, equivalently, we can sum the byte_1...byte_N for each iteration into N - * separate counters, then do the multiplications by N...1 just once at the end - * rather than once per iteration. - * - * Also, we must account for how previous bytes will affect s2 by doing the - * following at beginning of each iteration: - * - * s2 += s1 * N - * - * Furthermore, like s1, "s2" can actually be multiple counters which are - * eventually summed together. - */ - -static u32 ATTRIBUTES -FUNCNAME(u32 adler, const u8 *p, size_t size) -{ - u32 s1 = adler & 0xFFFF; - u32 s2 = adler >> 16; - const u8 * const end = p + size; - const u8 *vend; - const size_t max_chunk_size = - MIN(MAX_CHUNK_SIZE, IMPL_MAX_CHUNK_SIZE) - - (MIN(MAX_CHUNK_SIZE, IMPL_MAX_CHUNK_SIZE) % - IMPL_SEGMENT_SIZE); - - /* Process a byte at a time until the needed alignment is reached */ - if (p != end && (uintptr_t)p % IMPL_ALIGNMENT) { - do { - s1 += *p++; - s2 += s1; - } while (p != end && (uintptr_t)p % IMPL_ALIGNMENT); - s1 %= DIVISOR; - s2 %= DIVISOR; - } - - /* - * Process "chunks" of bytes using vector instructions. Chunk sizes are - * limited to MAX_CHUNK_SIZE, which guarantees that s1 and s2 never - * overflow before being reduced modulo DIVISOR. For vector processing, - * chunk sizes are also made evenly divisible by IMPL_SEGMENT_SIZE and - * may be further limited to IMPL_MAX_CHUNK_SIZE. - */ - STATIC_ASSERT(IMPL_SEGMENT_SIZE % IMPL_ALIGNMENT == 0); - vend = end - ((size_t)(end - p) % IMPL_SEGMENT_SIZE); - while (p != vend) { - size_t chunk_size = MIN((size_t)(vend - p), max_chunk_size); - - s2 += s1 * chunk_size; - - FUNCNAME_CHUNK((const void *)p, (const void *)(p + chunk_size), - &s1, &s2); - - p += chunk_size; - s1 %= DIVISOR; - s2 %= DIVISOR; - } - - /* Process any remaining bytes */ - if (p != end) { - do { - s1 += *p++; - s2 += s1; - } while (p != end); - s1 %= DIVISOR; - s2 %= DIVISOR; - } - - return (s2 << 16) | s1; -} - -#undef FUNCNAME -#undef FUNCNAME_CHUNK -#undef ATTRIBUTES -#undef IMPL_ALIGNMENT -#undef IMPL_SEGMENT_SIZE -#undef IMPL_MAX_CHUNK_SIZE |