summaryrefslogtreecommitdiff
path: root/util/compress/libdeflate/scripts/checksum_benchmarks.sh
blob: 23b5984eb03e183792e880555d93e93c51c59bed (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#!/bin/bash

set -eu -o pipefail

have_cpu_feature() {
	local feature="$1"
	local tag
	case $ARCH in
	arm*|aarch*)
		tag="Features"
		;;
	*)
		tag="flags"
		;;
	esac
	grep -q "^$tag"$'[ \t]'"*:.*\<$feature\>" /proc/cpuinfo
}

make_and_test() {
	# Build the checksum program and tests.  Set the special test support
	# flag to get support for LIBDEFLATE_DISABLE_CPU_FEATURES.
	make "$@" TEST_SUPPORT__DO_NOT_USE=1 checksum test_checksums > /dev/null

	# Run the checksum tests, for good measure.  (This isn't actually part
	# of the benchmarking.)
	./test_checksums > /dev/null
}

__do_benchmark() {
	local impl="$1" speed
	shift
	local flags=("$@")

	speed=$(./checksum "${CKSUM_FLAGS[@]}" "${flags[@]}" -t "$FILE" | \
		grep -o '[0-9]\+ MB/s' | grep -o '[0-9]\+')
	printf "%-45s%-10s\n" "$CKSUM_NAME ($impl)" "$speed"
}

do_benchmark() {
	local impl="$1"

	if [ "$impl" = zlib ]; then
		__do_benchmark "$impl" "-Z"
	else
		make_and_test CFLAGS="${EXTRA_CFLAGS[*]}"
		__do_benchmark "libdeflate, $impl"
		if [ "$ARCH" = x86_64 ]; then
			make_and_test CFLAGS="-m32 ${EXTRA_CFLAGS[*]}"
			__do_benchmark "libdeflate, $impl, 32-bit"
		fi
	fi
}

sort_by_speed() {
	awk '{print $NF, $0}' | sort -nr | cut -f2- -d' '
}

disable_cpu_feature() {
	local name="$1"
	shift
	local extra_cflags=("$@")

	LIBDEFLATE_DISABLE_CPU_FEATURES+=",$name"
	EXTRA_CFLAGS+=("${extra_cflags[@]}")
}

cleanup() {
	if $USING_TMPFILE; then
		rm "$FILE"
	fi
}

ARCH="$(uname -m)"
USING_TMPFILE=false

if (( $# > 1 )); then
	echo "Usage: $0 [FILE]" 1>&2
	exit 1
fi

trap cleanup EXIT

if (( $# == 0 )); then
	# Generate default test data file.
	FILE=$(mktemp -t checksum_testdata.XXXXXXXXXX)
	USING_TMPFILE=true
	echo "Generating 100 MB test file: $FILE"
	head -c 100000000 /dev/urandom > "$FILE"
else
	FILE="$1"
fi

cat << EOF
Method                                       Speed (MB/s)
------                                       ------------
EOF

# CRC-32
CKSUM_NAME="CRC-32"
CKSUM_FLAGS=()
EXTRA_CFLAGS=()
export LIBDEFLATE_DISABLE_CPU_FEATURES=""
{
case $ARCH in
i386|x86_64)
	if have_cpu_feature pclmulqdq && have_cpu_feature avx; then
		do_benchmark "PCLMUL/AVX"
		disable_cpu_feature "avx" "-mno-avx"
	fi
	if have_cpu_feature pclmulqdq; then
		do_benchmark "PCLMUL"
		disable_cpu_feature "pclmul" "-mno-pclmul"
	fi
	;;
arm*|aarch*)
	if have_cpu_feature crc32; then
		do_benchmark "ARM"
		disable_cpu_feature "crc32" "-march=armv8-a+nocrc"
	fi
	if have_cpu_feature pmull; then
		do_benchmark "PMULL"
		disable_cpu_feature "pmull" "-march=armv8-a+nocrc+nocrypto"
	fi
	;;
esac
do_benchmark "generic"
do_benchmark "zlib"
} | sort_by_speed

# Adler-32
CKSUM_NAME="Adler-32"
CKSUM_FLAGS=(-A)
EXTRA_CFLAGS=()
export LIBDEFLATE_DISABLE_CPU_FEATURES=""
echo
{
case $ARCH in
i386|x86_64)
	if have_cpu_feature avx512bw; then
		do_benchmark "AVX-512BW"
		disable_cpu_feature "avx512bw" "-mno-avx512bw"
	fi
	if have_cpu_feature avx2; then
		do_benchmark "AVX2"
		disable_cpu_feature "avx2" "-mno-avx2"
	fi
	if have_cpu_feature sse2; then
		do_benchmark "SSE2"
		disable_cpu_feature "sse2" "-mno-sse2"
	fi
	;;
arm*)
	if have_cpu_feature neon; then
		do_benchmark "NEON"
		disable_cpu_feature "neon" "-mfpu=vfpv3"
	fi
	;;
aarch*)
	if have_cpu_feature asimd; then
		do_benchmark "NEON"
		disable_cpu_feature "neon" "-march=armv8-a+nosimd"
	fi
	;;
esac
do_benchmark "generic"
do_benchmark "zlib"
} | sort_by_speed