summaryrefslogtreecommitdiff
path: root/util/compress/libdeflate/lib/arm/adler32_impl.h
diff options
context:
space:
mode:
Diffstat (limited to 'util/compress/libdeflate/lib/arm/adler32_impl.h')
-rw-r--r--util/compress/libdeflate/lib/arm/adler32_impl.h125
1 files changed, 0 insertions, 125 deletions
diff --git a/util/compress/libdeflate/lib/arm/adler32_impl.h b/util/compress/libdeflate/lib/arm/adler32_impl.h
deleted file mode 100644
index 17e56c004..000000000
--- a/util/compress/libdeflate/lib/arm/adler32_impl.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * arm/adler32_impl.h - ARM implementations of Adler-32 checksum algorithm
- *
- * Copyright 2016 Eric Biggers
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef LIB_ARM_ADLER32_IMPL_H
-#define LIB_ARM_ADLER32_IMPL_H
-
-#include "cpu_features.h"
-
-/* NEON implementation */
-#undef DISPATCH_NEON
-#if !defined(DEFAULT_IMPL) && \
- (defined(__ARM_NEON) || (ARM_CPU_FEATURES_ENABLED && \
- COMPILER_SUPPORTS_NEON_TARGET_INTRINSICS))
-# define FUNCNAME adler32_neon
-# define FUNCNAME_CHUNK adler32_neon_chunk
-# define IMPL_ALIGNMENT 16
-# define IMPL_SEGMENT_SIZE 32
-/* Prevent unsigned overflow of the 16-bit precision byte counters */
-# define IMPL_MAX_CHUNK_SIZE (32 * (0xFFFF / 0xFF))
-# ifdef __ARM_NEON
-# define ATTRIBUTES
-# define DEFAULT_IMPL adler32_neon
-# else
-# ifdef __arm__
-# define ATTRIBUTES __attribute__((target("fpu=neon")))
-# else
-# define ATTRIBUTES __attribute__((target("+simd")))
-# endif
-# define DISPATCH 1
-# define DISPATCH_NEON 1
-# endif
-# include <arm_neon.h>
-static forceinline ATTRIBUTES void
-adler32_neon_chunk(const uint8x16_t *p, const uint8x16_t * const end,
- u32 *s1, u32 *s2)
-{
- uint32x4_t v_s1 = (uint32x4_t) { 0, 0, 0, 0 };
- uint32x4_t v_s2 = (uint32x4_t) { 0, 0, 0, 0 };
- uint16x8_t v_byte_sums_a = (uint16x8_t) { 0, 0, 0, 0, 0, 0, 0, 0 };
- uint16x8_t v_byte_sums_b = (uint16x8_t) { 0, 0, 0, 0, 0, 0, 0, 0 };
- uint16x8_t v_byte_sums_c = (uint16x8_t) { 0, 0, 0, 0, 0, 0, 0, 0 };
- uint16x8_t v_byte_sums_d = (uint16x8_t) { 0, 0, 0, 0, 0, 0, 0, 0 };
-
- do {
- const uint8x16_t bytes1 = *p++;
- const uint8x16_t bytes2 = *p++;
- uint16x8_t tmp;
-
- v_s2 += v_s1;
-
- /* Vector Pairwise Add Long (u8 => u16) */
- tmp = vpaddlq_u8(bytes1);
-
- /* Vector Pairwise Add and Accumulate Long (u8 => u16) */
- tmp = vpadalq_u8(tmp, bytes2);
-
- /* Vector Pairwise Add and Accumulate Long (u16 => u32) */
- v_s1 = vpadalq_u16(v_s1, tmp);
-
- /* Vector Add Wide (u8 => u16) */
- v_byte_sums_a = vaddw_u8(v_byte_sums_a, vget_low_u8(bytes1));
- v_byte_sums_b = vaddw_u8(v_byte_sums_b, vget_high_u8(bytes1));
- v_byte_sums_c = vaddw_u8(v_byte_sums_c, vget_low_u8(bytes2));
- v_byte_sums_d = vaddw_u8(v_byte_sums_d, vget_high_u8(bytes2));
-
- } while (p != end);
-
- /* Vector Shift Left (u32) */
- v_s2 = vqshlq_n_u32(v_s2, 5);
-
- /* Vector Multiply Accumulate Long (u16 => u32) */
- v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_a), (uint16x4_t) { 32, 31, 30, 29 });
- v_s2 = vmlal_u16(v_s2, vget_high_u16(v_byte_sums_a), (uint16x4_t) { 28, 27, 26, 25 });
- v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_b), (uint16x4_t) { 24, 23, 22, 21 });
- v_s2 = vmlal_u16(v_s2, vget_high_u16(v_byte_sums_b), (uint16x4_t) { 20, 19, 18, 17 });
- v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_c), (uint16x4_t) { 16, 15, 14, 13 });
- v_s2 = vmlal_u16(v_s2, vget_high_u16(v_byte_sums_c), (uint16x4_t) { 12, 11, 10, 9 });
- v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_byte_sums_d), (uint16x4_t) { 8, 7, 6, 5 });
- v_s2 = vmlal_u16(v_s2, vget_high_u16(v_byte_sums_d), (uint16x4_t) { 4, 3, 2, 1 });
-
- *s1 += v_s1[0] + v_s1[1] + v_s1[2] + v_s1[3];
- *s2 += v_s2[0] + v_s2[1] + v_s2[2] + v_s2[3];
-}
-# include "../adler32_vec_template.h"
-#endif /* NEON implementation */
-
-#ifdef DISPATCH
-static inline adler32_func_t
-arch_select_adler32_func(void)
-{
- u32 features = get_cpu_features();
-
-#ifdef DISPATCH_NEON
- if (features & ARM_CPU_FEATURE_NEON)
- return adler32_neon;
-#endif
- return NULL;
-}
-#endif /* DISPATCH */
-
-#endif /* LIB_ARM_ADLER32_IMPL_H */