core/stdarch/crates/core_arch/src/x86/
avx512vnni.rs

1use crate::core_arch::{simd::*, x86::*};
2use crate::intrinsics::simd::*;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
8///
9/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssd_epi32&expand=2219)
10#[inline]
11#[target_feature(enable = "avx512vnni")]
12#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13#[cfg_attr(test, assert_instr(vpdpwssd))]
14pub fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
15    unsafe { transmute(vpdpwssd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
16}
17
18/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19///
20/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssd_epi32&expand=2220)
21#[inline]
22#[target_feature(enable = "avx512vnni")]
23#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24#[cfg_attr(test, assert_instr(vpdpwssd))]
25pub fn _mm512_mask_dpwssd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26    unsafe {
27        let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
28        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
29    }
30}
31
32/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
33///
34/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssd_epi32&expand=2221)
35#[inline]
36#[target_feature(enable = "avx512vnni")]
37#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38#[cfg_attr(test, assert_instr(vpdpwssd))]
39pub fn _mm512_maskz_dpwssd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
40    unsafe {
41        let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
42        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
43    }
44}
45
46/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
47///
48/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_avx_epi32&expand=2713)
49#[inline]
50#[target_feature(enable = "avxvnni")]
51#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
52#[cfg_attr(
53    all(test, any(target_os = "linux", target_env = "msvc")),
54    assert_instr(vpdpwssd)
55)]
56pub fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
57    unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
58}
59
60/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
61///
62/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssd_epi32&expand=2216)
63#[inline]
64#[target_feature(enable = "avx512vnni,avx512vl")]
65#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
66#[cfg_attr(test, assert_instr(vpdpwssd))]
67pub fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
68    unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
69}
70
71/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
72///
73/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssd_epi32&expand=2217)
74#[inline]
75#[target_feature(enable = "avx512vnni,avx512vl")]
76#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
77#[cfg_attr(test, assert_instr(vpdpwssd))]
78pub fn _mm256_mask_dpwssd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
79    unsafe {
80        let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
81        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
82    }
83}
84
85/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
86///
87/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssd_epi32&expand=2218)
88#[inline]
89#[target_feature(enable = "avx512vnni,avx512vl")]
90#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
91#[cfg_attr(test, assert_instr(vpdpwssd))]
92pub fn _mm256_maskz_dpwssd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
93    unsafe {
94        let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
95        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
96    }
97}
98
99/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
100///
101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_avx_epi32&expand=2712)
102#[inline]
103#[target_feature(enable = "avxvnni")]
104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
105#[cfg_attr(
106    all(test, any(target_os = "linux", target_env = "msvc")),
107    assert_instr(vpdpwssd)
108)]
109pub fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
110    unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
111}
112
113/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
114///
115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssd_epi32&expand=2213)
116#[inline]
117#[target_feature(enable = "avx512vnni,avx512vl")]
118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
119#[cfg_attr(test, assert_instr(vpdpwssd))]
120pub fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
121    unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
122}
123
124/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
125///
126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssd_epi32&expand=2214)
127#[inline]
128#[target_feature(enable = "avx512vnni,avx512vl")]
129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
130#[cfg_attr(test, assert_instr(vpdpwssd))]
131pub fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
132    unsafe {
133        let r = _mm_dpwssd_epi32(src, a, b).as_i32x4();
134        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
135    }
136}
137
138/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
139///
140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssd_epi32&expand=2215)
141#[inline]
142#[target_feature(enable = "avx512vnni,avx512vl")]
143#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
144#[cfg_attr(test, assert_instr(vpdpwssd))]
145pub fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
146    unsafe {
147        let r = _mm_dpwssd_epi32(src, a, b).as_i32x4();
148        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
149    }
150}
151
152/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
153///
154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpwssds_epi32&expand=2228)
155#[inline]
156#[target_feature(enable = "avx512vnni")]
157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
158#[cfg_attr(test, assert_instr(vpdpwssds))]
159pub fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
160    unsafe { transmute(vpdpwssds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
161}
162
163/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
164///
165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpwssds_epi32&expand=2229)
166#[inline]
167#[target_feature(enable = "avx512vnni")]
168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
169#[cfg_attr(test, assert_instr(vpdpwssds))]
170pub fn _mm512_mask_dpwssds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
171    unsafe {
172        let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
173        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
174    }
175}
176
177/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
178///
179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpwssds_epi32&expand=2230)
180#[inline]
181#[target_feature(enable = "avx512vnni")]
182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
183#[cfg_attr(test, assert_instr(vpdpwssds))]
184pub fn _mm512_maskz_dpwssds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
185    unsafe {
186        let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
187        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
188    }
189}
190
191/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
192///
193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_avx_epi32&expand=2726)
194#[inline]
195#[target_feature(enable = "avxvnni")]
196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
197#[cfg_attr(
198    all(test, any(target_os = "linux", target_env = "msvc")),
199    assert_instr(vpdpwssds)
200)]
201pub fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
202    unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
203}
204
205/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
206///
207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwssds_epi32&expand=2225)
208#[inline]
209#[target_feature(enable = "avx512vnni,avx512vl")]
210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
211#[cfg_attr(test, assert_instr(vpdpwssds))]
212pub fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
213    unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
214}
215
216/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
217///
218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpwssds_epi32&expand=2226)
219#[inline]
220#[target_feature(enable = "avx512vnni,avx512vl")]
221#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
222#[cfg_attr(test, assert_instr(vpdpwssds))]
223pub fn _mm256_mask_dpwssds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
224    unsafe {
225        let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
226        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
227    }
228}
229
230/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
231///
232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpwssds_epi32&expand=2227)
233#[inline]
234#[target_feature(enable = "avx512vnni,avx512vl")]
235#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
236#[cfg_attr(test, assert_instr(vpdpwssds))]
237pub fn _mm256_maskz_dpwssds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
238    unsafe {
239        let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
240        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
241    }
242}
243
244/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
245///
246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_avx_epi32&expand=2725)
247#[inline]
248#[target_feature(enable = "avxvnni")]
249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
250#[cfg_attr(
251    all(test, any(target_os = "linux", target_env = "msvc")),
252    assert_instr(vpdpwssds)
253)]
254pub fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
255    unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
256}
257
258/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
259///
260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwssds_epi32&expand=2222)
261#[inline]
262#[target_feature(enable = "avx512vnni,avx512vl")]
263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
264#[cfg_attr(test, assert_instr(vpdpwssds))]
265pub fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
266    unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
267}
268
269/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
270///
271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpwssds_epi32&expand=2223)
272#[inline]
273#[target_feature(enable = "avx512vnni,avx512vl")]
274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
275#[cfg_attr(test, assert_instr(vpdpwssds))]
276pub fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
277    unsafe {
278        let r = _mm_dpwssds_epi32(src, a, b).as_i32x4();
279        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
280    }
281}
282
283/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
284///
285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpwssds_epi32&expand=2224)
286#[inline]
287#[target_feature(enable = "avx512vnni,avx512vl")]
288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
289#[cfg_attr(test, assert_instr(vpdpwssds))]
290pub fn _mm_maskz_dpwssds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
291    unsafe {
292        let r = _mm_dpwssds_epi32(src, a, b).as_i32x4();
293        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
294    }
295}
296
297/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
298///
299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusd_epi32&expand=2201)
300#[inline]
301#[target_feature(enable = "avx512vnni")]
302#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
303#[cfg_attr(test, assert_instr(vpdpbusd))]
304pub fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
305    unsafe { transmute(vpdpbusd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
306}
307
308/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
309///
310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusd_epi32&expand=2202)
311#[inline]
312#[target_feature(enable = "avx512vnni")]
313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
314#[cfg_attr(test, assert_instr(vpdpbusd))]
315pub fn _mm512_mask_dpbusd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
316    unsafe {
317        let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
318        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
319    }
320}
321
322/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
323///
324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusd_epi32&expand=2203)
325#[inline]
326#[target_feature(enable = "avx512vnni")]
327#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
328#[cfg_attr(test, assert_instr(vpdpbusd))]
329pub fn _mm512_maskz_dpbusd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
330    unsafe {
331        let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
332        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
333    }
334}
335
336/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
337///
338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_avx_epi32&expand=2683)
339#[inline]
340#[target_feature(enable = "avxvnni")]
341#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
342#[cfg_attr(
343    all(test, any(target_os = "linux", target_env = "msvc")),
344    assert_instr(vpdpbusd)
345)]
346pub fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
347    unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
348}
349
350/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
351///
352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusd_epi32&expand=2198)
353#[inline]
354#[target_feature(enable = "avx512vnni,avx512vl")]
355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
356#[cfg_attr(test, assert_instr(vpdpbusd))]
357pub fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
358    unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
359}
360
361/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
362///
363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusd_epi32&expand=2199)
364#[inline]
365#[target_feature(enable = "avx512vnni,avx512vl")]
366#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
367#[cfg_attr(test, assert_instr(vpdpbusd))]
368pub fn _mm256_mask_dpbusd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
369    unsafe {
370        let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
371        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
372    }
373}
374
375/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
376///
377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusd_epi32&expand=2200)
378#[inline]
379#[target_feature(enable = "avx512vnni,avx512vl")]
380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
381#[cfg_attr(test, assert_instr(vpdpbusd))]
382pub fn _mm256_maskz_dpbusd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
383    unsafe {
384        let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
385        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
386    }
387}
388
389/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
390///
391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_avx_epi32&expand=2682)
392#[inline]
393#[target_feature(enable = "avxvnni")]
394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
395#[cfg_attr(
396    all(test, any(target_os = "linux", target_env = "msvc")),
397    assert_instr(vpdpbusd)
398)]
399pub fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
400    unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
401}
402
403/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
404///
405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusd_epi32&expand=2195)
406#[inline]
407#[target_feature(enable = "avx512vnni,avx512vl")]
408#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
409#[cfg_attr(test, assert_instr(vpdpbusd))]
410pub fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
411    unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
412}
413
414/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
415///
416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusd_epi32&expand=2196)
417#[inline]
418#[target_feature(enable = "avx512vnni,avx512vl")]
419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
420#[cfg_attr(test, assert_instr(vpdpbusd))]
421pub fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
422    unsafe {
423        let r = _mm_dpbusd_epi32(src, a, b).as_i32x4();
424        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
425    }
426}
427
428/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
429///
430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusd_epi32&expand=2197)
431#[inline]
432#[target_feature(enable = "avx512vnni,avx512vl")]
433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
434#[cfg_attr(test, assert_instr(vpdpbusd))]
435pub fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
436    unsafe {
437        let r = _mm_dpbusd_epi32(src, a, b).as_i32x4();
438        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
439    }
440}
441
442/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
443///
444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dpbusds_epi32&expand=2210)
445#[inline]
446#[target_feature(enable = "avx512vnni")]
447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
448#[cfg_attr(test, assert_instr(vpdpbusds))]
449pub fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
450    unsafe { transmute(vpdpbusds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
451}
452
453/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
454///
455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dpbusds_epi32&expand=2211)
456#[inline]
457#[target_feature(enable = "avx512vnni")]
458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
459#[cfg_attr(test, assert_instr(vpdpbusds))]
460pub fn _mm512_mask_dpbusds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
461    unsafe {
462        let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
463        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
464    }
465}
466
467/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
468///
469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dpbusds_epi32&expand=2212)
470#[inline]
471#[target_feature(enable = "avx512vnni")]
472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
473#[cfg_attr(test, assert_instr(vpdpbusds))]
474pub fn _mm512_maskz_dpbusds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
475    unsafe {
476        let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
477        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
478    }
479}
480
481/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
482///
483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_avx_epi32&expand=2696)
484#[inline]
485#[target_feature(enable = "avxvnni")]
486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
487#[cfg_attr(
488    all(test, any(target_os = "linux", target_env = "msvc")),
489    assert_instr(vpdpbusds)
490)]
491pub fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
492    unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
493}
494
495/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
496///
497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbusds_epi32&expand=2207)
498#[inline]
499#[target_feature(enable = "avx512vnni,avx512vl")]
500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
501#[cfg_attr(test, assert_instr(vpdpbusds))]
502pub fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
503    unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
504}
505
506/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
507///
508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dpbusds_epi32&expand=2208)
509#[inline]
510#[target_feature(enable = "avx512vnni,avx512vl")]
511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
512#[cfg_attr(test, assert_instr(vpdpbusds))]
513pub fn _mm256_mask_dpbusds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
514    unsafe {
515        let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
516        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
517    }
518}
519
520/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
521///
522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dpbusds_epi32&expand=2209)
523#[inline]
524#[target_feature(enable = "avx512vnni,avx512vl")]
525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
526#[cfg_attr(test, assert_instr(vpdpbusds))]
527pub fn _mm256_maskz_dpbusds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
528    unsafe {
529        let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
530        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
531    }
532}
533
534/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
535///
536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_avx_epi32&expand=2695)
537#[inline]
538#[target_feature(enable = "avxvnni")]
539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
540#[cfg_attr(
541    all(test, any(target_os = "linux", target_env = "msvc")),
542    assert_instr(vpdpbusds)
543)]
544pub fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
545    unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
546}
547
548/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
549///
550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbusds_epi32&expand=2204)
551#[inline]
552#[target_feature(enable = "avx512vnni,avx512vl")]
553#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
554#[cfg_attr(test, assert_instr(vpdpbusds))]
555pub fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
556    unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
557}
558
559/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
560///
561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dpbusds_epi32&expand=2205)
562#[inline]
563#[target_feature(enable = "avx512vnni,avx512vl")]
564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
565#[cfg_attr(test, assert_instr(vpdpbusds))]
566pub fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
567    unsafe {
568        let r = _mm_dpbusds_epi32(src, a, b).as_i32x4();
569        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
570    }
571}
572
573/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
574///
575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dpbusds_epi32&expand=2206)
576#[inline]
577#[target_feature(enable = "avx512vnni,avx512vl")]
578#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
579#[cfg_attr(test, assert_instr(vpdpbusds))]
580pub fn _mm_maskz_dpbusds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
581    unsafe {
582        let r = _mm_dpbusds_epi32(src, a, b).as_i32x4();
583        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
584    }
585}
586
587/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
588/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
589/// 32-bit integer in src, and store the packed 32-bit results in dst.
590///
591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbssd_epi32&expand=2674)
592#[inline]
593#[target_feature(enable = "avxvnniint8")]
594#[cfg_attr(
595    all(test, any(target_os = "linux", target_env = "msvc")),
596    assert_instr(vpdpbssd)
597)]
598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
599pub fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
600    unsafe { transmute(vpdpbssd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
601}
602
603/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
604/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
605/// 32-bit integer in src, and store the packed 32-bit results in dst.
606///
607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbssd_epi32&expand=2675)
608#[inline]
609#[target_feature(enable = "avxvnniint8")]
610#[cfg_attr(
611    all(test, any(target_os = "linux", target_env = "msvc")),
612    assert_instr(vpdpbssd)
613)]
614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
615pub fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
616    unsafe { transmute(vpdpbssd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
617}
618
619/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
620/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
621/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
622///
623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbssds_epi32&expand=2676)
624#[inline]
625#[target_feature(enable = "avxvnniint8")]
626#[cfg_attr(
627    all(test, any(target_os = "linux", target_env = "msvc")),
628    assert_instr(vpdpbssds)
629)]
630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
631pub fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
632    unsafe { transmute(vpdpbssds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
633}
634
635/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
636/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
637/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
638///
639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbssds_epi32&expand=2677)
640#[inline]
641#[target_feature(enable = "avxvnniint8")]
642#[cfg_attr(
643    all(test, any(target_os = "linux", target_env = "msvc")),
644    assert_instr(vpdpbssds)
645)]
646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
647pub fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
648    unsafe { transmute(vpdpbssds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
649}
650
651/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
652/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
653/// 32-bit integer in src, and store the packed 32-bit results in dst.
654///
655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbsud_epi32&expand=2678)
656#[inline]
657#[target_feature(enable = "avxvnniint8")]
658#[cfg_attr(
659    all(test, any(target_os = "linux", target_env = "msvc")),
660    assert_instr(vpdpbsud)
661)]
662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
663pub fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
664    unsafe { transmute(vpdpbsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
665}
666
667/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
668/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
669/// 32-bit integer in src, and store the packed 32-bit results in dst.
670///
671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbsud_epi32&expand=2679)
672#[inline]
673#[target_feature(enable = "avxvnniint8")]
674#[cfg_attr(
675    all(test, any(target_os = "linux", target_env = "msvc")),
676    assert_instr(vpdpbsud)
677)]
678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
679pub fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
680    unsafe { transmute(vpdpbsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
681}
682
683/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
684/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
685/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
686///
687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbsuds_epi32&expand=2680)
688#[inline]
689#[target_feature(enable = "avxvnniint8")]
690#[cfg_attr(
691    all(test, any(target_os = "linux", target_env = "msvc")),
692    assert_instr(vpdpbsuds)
693)]
694#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
695pub fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
696    unsafe { transmute(vpdpbsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
697}
698
699/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
700/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
701/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
702///
703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbsuds_epi32&expand=2681)
704#[inline]
705#[target_feature(enable = "avxvnniint8")]
706#[cfg_attr(
707    all(test, any(target_os = "linux", target_env = "msvc")),
708    assert_instr(vpdpbsuds)
709)]
710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
711pub fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
712    unsafe { transmute(vpdpbsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
713}
714
715/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
716/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
717/// 32-bit integer in src, and store the packed 32-bit results in dst.
718///
719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbuud_epi32&expand=2708)
720#[inline]
721#[target_feature(enable = "avxvnniint8")]
722#[cfg_attr(
723    all(test, any(target_os = "linux", target_env = "msvc")),
724    assert_instr(vpdpbuud)
725)]
726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
727pub fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
728    unsafe { transmute(vpdpbuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
729}
730
731/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
732/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
733/// 32-bit integer in src, and store the packed 32-bit results in dst.
734///
735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbuud_epi32&expand=2709)
736#[inline]
737#[target_feature(enable = "avxvnniint8")]
738#[cfg_attr(
739    all(test, any(target_os = "linux", target_env = "msvc")),
740    assert_instr(vpdpbuud)
741)]
742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
743pub fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
744    unsafe { transmute(vpdpbuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
745}
746
747/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
748/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
749/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
750///
751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpbuuds_epi32&expand=2710)
752#[inline]
753#[target_feature(enable = "avxvnniint8")]
754#[cfg_attr(
755    all(test, any(target_os = "linux", target_env = "msvc")),
756    assert_instr(vpdpbuuds)
757)]
758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
759pub fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
760    unsafe { transmute(vpdpbuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
761}
762
763/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
764/// integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding
765/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
766///
767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpbuuds_epi32&expand=2711)
768#[inline]
769#[target_feature(enable = "avxvnniint8")]
770#[cfg_attr(
771    all(test, any(target_os = "linux", target_env = "msvc")),
772    assert_instr(vpdpbuuds)
773)]
774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
775pub fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
776    unsafe { transmute(vpdpbuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
777}
778
779/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
780/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
781/// 32-bit integer in src, and store the packed 32-bit results in dst.
782///
783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwsud_epi32&expand=2738)
784#[inline]
785#[target_feature(enable = "avxvnniint16")]
786#[cfg_attr(
787    all(test, any(target_os = "linux", target_env = "msvc")),
788    assert_instr(vpdpwsud)
789)]
790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
791pub fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
792    unsafe { transmute(vpdpwsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
793}
794
795/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
796/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
797/// 32-bit integer in src, and store the packed 32-bit results in dst.
798///
799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwsud_epi32&expand=2739)
800#[inline]
801#[target_feature(enable = "avxvnniint16")]
802#[cfg_attr(
803    all(test, any(target_os = "linux", target_env = "msvc")),
804    assert_instr(vpdpwsud)
805)]
806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
807pub fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
808    unsafe { transmute(vpdpwsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
809}
810
811/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
812/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
813/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
814///
815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwsuds_epi32&expand=2740)
816#[inline]
817#[target_feature(enable = "avxvnniint16")]
818#[cfg_attr(
819    all(test, any(target_os = "linux", target_env = "msvc")),
820    assert_instr(vpdpwsuds)
821)]
822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
823pub fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
824    unsafe { transmute(vpdpwsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
825}
826
827/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
828/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
829/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
830///
831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwsuds_epi32&expand=2741)
832#[inline]
833#[target_feature(enable = "avxvnniint16")]
834#[cfg_attr(
835    all(test, any(target_os = "linux", target_env = "msvc")),
836    assert_instr(vpdpwsuds)
837)]
838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
839pub fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
840    unsafe { transmute(vpdpwsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
841}
842
843/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
844/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
845/// 32-bit integer in src, and store the packed 32-bit results in dst.
846///
847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwusd_epi32&expand=2742)
848#[inline]
849#[target_feature(enable = "avxvnniint16")]
850#[cfg_attr(
851    all(test, any(target_os = "linux", target_env = "msvc")),
852    assert_instr(vpdpwusd)
853)]
854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
855pub fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
856    unsafe { transmute(vpdpwusd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
857}
858
859/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
860/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
861/// 32-bit integer in src, and store the packed 32-bit results in dst.
862///
863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwusd_epi32&expand=2743)
864#[inline]
865#[target_feature(enable = "avxvnniint16")]
866#[cfg_attr(
867    all(test, any(target_os = "linux", target_env = "msvc")),
868    assert_instr(vpdpwusd)
869)]
870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
871pub fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
872    unsafe { transmute(vpdpwusd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
873}
874
875/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
876/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
877/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
878///
879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwusds_epi32&expand=2744)
880#[inline]
881#[target_feature(enable = "avxvnniint16")]
882#[cfg_attr(
883    all(test, any(target_os = "linux", target_env = "msvc")),
884    assert_instr(vpdpwusds)
885)]
886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
887pub fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
888    unsafe { transmute(vpdpwusds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
889}
890
891/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
892/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
893/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
894///
895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwusds_epi32&expand=2745)
896#[inline]
897#[target_feature(enable = "avxvnniint16")]
898#[cfg_attr(
899    all(test, any(target_os = "linux", target_env = "msvc")),
900    assert_instr(vpdpwusds)
901)]
902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
903pub fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
904    unsafe { transmute(vpdpwusds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
905}
906
907/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
908/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
909/// 32-bit integer in src, and store the packed 32-bit results in dst.
910///
911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwuud_epi32&expand=2746)
912#[inline]
913#[target_feature(enable = "avxvnniint16")]
914#[cfg_attr(
915    all(test, any(target_os = "linux", target_env = "msvc")),
916    assert_instr(vpdpwuud)
917)]
918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
919pub fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
920    unsafe { transmute(vpdpwuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
921}
922
923/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
924/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
925/// 32-bit integer in src, and store the packed 32-bit results in dst.
926///
927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwuud_epi32&expand=2747)
928#[inline]
929#[target_feature(enable = "avxvnniint16")]
930#[cfg_attr(
931    all(test, any(target_os = "linux", target_env = "msvc")),
932    assert_instr(vpdpwuud)
933)]
934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
935pub fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
936    unsafe { transmute(vpdpwuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
937}
938
939/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
940/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
941/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
942///
943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dpwuuds_epi32&expand=2748)
944#[inline]
945#[target_feature(enable = "avxvnniint16")]
946#[cfg_attr(
947    all(test, any(target_os = "linux", target_env = "msvc")),
948    assert_instr(vpdpwuuds)
949)]
950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
951pub fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
952    unsafe { transmute(vpdpwuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
953}
954
955/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
956/// integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding
957/// 32-bit integer in src with signed saturation, and store the packed 32-bit results in dst.
958///
959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dpwuuds_epi32&expand=2749)
960#[inline]
961#[target_feature(enable = "avxvnniint16")]
962#[cfg_attr(
963    all(test, any(target_os = "linux", target_env = "msvc")),
964    assert_instr(vpdpwuuds)
965)]
966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
967pub fn _mm256_dpwuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
968    unsafe { transmute(vpdpwuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
969}
970
971#[allow(improper_ctypes)]
972unsafe extern "C" {
973    #[link_name = "llvm.x86.avx512.vpdpwssd.512"]
974    fn vpdpwssd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
975    #[link_name = "llvm.x86.avx512.vpdpwssd.256"]
976    fn vpdpwssd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
977    #[link_name = "llvm.x86.avx512.vpdpwssd.128"]
978    fn vpdpwssd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
979
980    #[link_name = "llvm.x86.avx512.vpdpwssds.512"]
981    fn vpdpwssds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
982    #[link_name = "llvm.x86.avx512.vpdpwssds.256"]
983    fn vpdpwssds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
984    #[link_name = "llvm.x86.avx512.vpdpwssds.128"]
985    fn vpdpwssds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
986
987    #[link_name = "llvm.x86.avx512.vpdpbusd.512"]
988    fn vpdpbusd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
989    #[link_name = "llvm.x86.avx512.vpdpbusd.256"]
990    fn vpdpbusd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
991    #[link_name = "llvm.x86.avx512.vpdpbusd.128"]
992    fn vpdpbusd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
993
994    #[link_name = "llvm.x86.avx512.vpdpbusds.512"]
995    fn vpdpbusds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
996    #[link_name = "llvm.x86.avx512.vpdpbusds.256"]
997    fn vpdpbusds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
998    #[link_name = "llvm.x86.avx512.vpdpbusds.128"]
999    fn vpdpbusds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1000
1001    #[link_name = "llvm.x86.avx2.vpdpbssd.128"]
1002    fn vpdpbssd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1003    #[link_name = "llvm.x86.avx2.vpdpbssd.256"]
1004    fn vpdpbssd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1005
1006    #[link_name = "llvm.x86.avx2.vpdpbssds.128"]
1007    fn vpdpbssds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1008    #[link_name = "llvm.x86.avx2.vpdpbssds.256"]
1009    fn vpdpbssds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1010
1011    #[link_name = "llvm.x86.avx2.vpdpbsud.128"]
1012    fn vpdpbsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1013    #[link_name = "llvm.x86.avx2.vpdpbsud.256"]
1014    fn vpdpbsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1015
1016    #[link_name = "llvm.x86.avx2.vpdpbsuds.128"]
1017    fn vpdpbsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1018    #[link_name = "llvm.x86.avx2.vpdpbsuds.256"]
1019    fn vpdpbsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1020
1021    #[link_name = "llvm.x86.avx2.vpdpbuud.128"]
1022    fn vpdpbuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1023    #[link_name = "llvm.x86.avx2.vpdpbuud.256"]
1024    fn vpdpbuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1025
1026    #[link_name = "llvm.x86.avx2.vpdpbuuds.128"]
1027    fn vpdpbuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1028    #[link_name = "llvm.x86.avx2.vpdpbuuds.256"]
1029    fn vpdpbuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1030
1031    #[link_name = "llvm.x86.avx2.vpdpwsud.128"]
1032    fn vpdpwsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1033    #[link_name = "llvm.x86.avx2.vpdpwsud.256"]
1034    fn vpdpwsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1035
1036    #[link_name = "llvm.x86.avx2.vpdpwsuds.128"]
1037    fn vpdpwsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1038    #[link_name = "llvm.x86.avx2.vpdpwsuds.256"]
1039    fn vpdpwsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1040
1041    #[link_name = "llvm.x86.avx2.vpdpwusd.128"]
1042    fn vpdpwusd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1043    #[link_name = "llvm.x86.avx2.vpdpwusd.256"]
1044    fn vpdpwusd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1045
1046    #[link_name = "llvm.x86.avx2.vpdpwusds.128"]
1047    fn vpdpwusds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1048    #[link_name = "llvm.x86.avx2.vpdpwusds.256"]
1049    fn vpdpwusds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1050
1051    #[link_name = "llvm.x86.avx2.vpdpwuud.128"]
1052    fn vpdpwuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1053    #[link_name = "llvm.x86.avx2.vpdpwuud.256"]
1054    fn vpdpwuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1055
1056    #[link_name = "llvm.x86.avx2.vpdpwuuds.128"]
1057    fn vpdpwuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
1058    #[link_name = "llvm.x86.avx2.vpdpwuuds.256"]
1059    fn vpdpwuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
1060}
1061
1062#[cfg(test)]
1063mod tests {
1064
1065    use crate::core_arch::x86::*;
1066    use stdarch_test::simd_test;
1067
1068    #[simd_test(enable = "avx512vnni")]
1069    unsafe fn test_mm512_dpwssd_epi32() {
1070        let src = _mm512_set1_epi32(1);
1071        let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1072        let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1073        let r = _mm512_dpwssd_epi32(src, a, b);
1074        let e = _mm512_set1_epi32(3);
1075        assert_eq_m512i(r, e);
1076    }
1077
1078    #[simd_test(enable = "avx512vnni")]
1079    unsafe fn test_mm512_mask_dpwssd_epi32() {
1080        let src = _mm512_set1_epi32(1);
1081        let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1082        let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1083        let r = _mm512_mask_dpwssd_epi32(src, 0b00000000_00000000, a, b);
1084        assert_eq_m512i(r, src);
1085        let r = _mm512_mask_dpwssd_epi32(src, 0b11111111_11111111, a, b);
1086        let e = _mm512_set1_epi32(3);
1087        assert_eq_m512i(r, e);
1088    }
1089
1090    #[simd_test(enable = "avx512vnni")]
1091    unsafe fn test_mm512_maskz_dpwssd_epi32() {
1092        let src = _mm512_set1_epi32(1);
1093        let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1094        let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1095        let r = _mm512_maskz_dpwssd_epi32(0b00000000_00000000, src, a, b);
1096        assert_eq_m512i(r, _mm512_setzero_si512());
1097        let r = _mm512_maskz_dpwssd_epi32(0b11111111_11111111, src, a, b);
1098        let e = _mm512_set1_epi32(3);
1099        assert_eq_m512i(r, e);
1100    }
1101
1102    #[simd_test(enable = "avxvnni")]
1103    unsafe fn test_mm256_dpwssd_avx_epi32() {
1104        let src = _mm256_set1_epi32(1);
1105        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1106        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1107        let r = _mm256_dpwssd_avx_epi32(src, a, b);
1108        let e = _mm256_set1_epi32(3);
1109        assert_eq_m256i(r, e);
1110    }
1111
1112    #[simd_test(enable = "avx512vnni,avx512vl")]
1113    unsafe fn test_mm256_dpwssd_epi32() {
1114        let src = _mm256_set1_epi32(1);
1115        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1116        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1117        let r = _mm256_dpwssd_epi32(src, a, b);
1118        let e = _mm256_set1_epi32(3);
1119        assert_eq_m256i(r, e);
1120    }
1121
1122    #[simd_test(enable = "avx512vnni,avx512vl")]
1123    unsafe fn test_mm256_mask_dpwssd_epi32() {
1124        let src = _mm256_set1_epi32(1);
1125        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1126        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1127        let r = _mm256_mask_dpwssd_epi32(src, 0b00000000, a, b);
1128        assert_eq_m256i(r, src);
1129        let r = _mm256_mask_dpwssd_epi32(src, 0b11111111, a, b);
1130        let e = _mm256_set1_epi32(3);
1131        assert_eq_m256i(r, e);
1132    }
1133
1134    #[simd_test(enable = "avx512vnni,avx512vl")]
1135    unsafe fn test_mm256_maskz_dpwssd_epi32() {
1136        let src = _mm256_set1_epi32(1);
1137        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1138        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1139        let r = _mm256_maskz_dpwssd_epi32(0b00000000, src, a, b);
1140        assert_eq_m256i(r, _mm256_setzero_si256());
1141        let r = _mm256_maskz_dpwssd_epi32(0b11111111, src, a, b);
1142        let e = _mm256_set1_epi32(3);
1143        assert_eq_m256i(r, e);
1144    }
1145
1146    #[simd_test(enable = "avxvnni")]
1147    unsafe fn test_mm_dpwssd_avx_epi32() {
1148        let src = _mm_set1_epi32(1);
1149        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1150        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1151        let r = _mm_dpwssd_avx_epi32(src, a, b);
1152        let e = _mm_set1_epi32(3);
1153        assert_eq_m128i(r, e);
1154    }
1155
1156    #[simd_test(enable = "avx512vnni,avx512vl")]
1157    unsafe fn test_mm_dpwssd_epi32() {
1158        let src = _mm_set1_epi32(1);
1159        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1160        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1161        let r = _mm_dpwssd_epi32(src, a, b);
1162        let e = _mm_set1_epi32(3);
1163        assert_eq_m128i(r, e);
1164    }
1165
1166    #[simd_test(enable = "avx512vnni,avx512vl")]
1167    unsafe fn test_mm_mask_dpwssd_epi32() {
1168        let src = _mm_set1_epi32(1);
1169        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1170        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1171        let r = _mm_mask_dpwssd_epi32(src, 0b00000000, a, b);
1172        assert_eq_m128i(r, src);
1173        let r = _mm_mask_dpwssd_epi32(src, 0b00001111, a, b);
1174        let e = _mm_set1_epi32(3);
1175        assert_eq_m128i(r, e);
1176    }
1177
1178    #[simd_test(enable = "avx512vnni,avx512vl")]
1179    unsafe fn test_mm_maskz_dpwssd_epi32() {
1180        let src = _mm_set1_epi32(1);
1181        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1182        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1183        let r = _mm_maskz_dpwssd_epi32(0b00000000, src, a, b);
1184        assert_eq_m128i(r, _mm_setzero_si128());
1185        let r = _mm_maskz_dpwssd_epi32(0b00001111, src, a, b);
1186        let e = _mm_set1_epi32(3);
1187        assert_eq_m128i(r, e);
1188    }
1189
1190    #[simd_test(enable = "avx512vnni")]
1191    unsafe fn test_mm512_dpwssds_epi32() {
1192        let src = _mm512_set1_epi32(1);
1193        let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1194        let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1195        let r = _mm512_dpwssds_epi32(src, a, b);
1196        let e = _mm512_set1_epi32(3);
1197        assert_eq_m512i(r, e);
1198    }
1199
1200    #[simd_test(enable = "avx512vnni")]
1201    unsafe fn test_mm512_mask_dpwssds_epi32() {
1202        let src = _mm512_set1_epi32(1);
1203        let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1204        let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1205        let r = _mm512_mask_dpwssds_epi32(src, 0b00000000_00000000, a, b);
1206        assert_eq_m512i(r, src);
1207        let r = _mm512_mask_dpwssds_epi32(src, 0b11111111_11111111, a, b);
1208        let e = _mm512_set1_epi32(3);
1209        assert_eq_m512i(r, e);
1210    }
1211
1212    #[simd_test(enable = "avx512vnni")]
1213    unsafe fn test_mm512_maskz_dpwssds_epi32() {
1214        let src = _mm512_set1_epi32(1);
1215        let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
1216        let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
1217        let r = _mm512_maskz_dpwssds_epi32(0b00000000_00000000, src, a, b);
1218        assert_eq_m512i(r, _mm512_setzero_si512());
1219        let r = _mm512_maskz_dpwssds_epi32(0b11111111_11111111, src, a, b);
1220        let e = _mm512_set1_epi32(3);
1221        assert_eq_m512i(r, e);
1222    }
1223
1224    #[simd_test(enable = "avxvnni")]
1225    unsafe fn test_mm256_dpwssds_avx_epi32() {
1226        let src = _mm256_set1_epi32(1);
1227        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1228        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1229        let r = _mm256_dpwssds_avx_epi32(src, a, b);
1230        let e = _mm256_set1_epi32(3);
1231        assert_eq_m256i(r, e);
1232    }
1233
1234    #[simd_test(enable = "avx512vnni,avx512vl")]
1235    unsafe fn test_mm256_dpwssds_epi32() {
1236        let src = _mm256_set1_epi32(1);
1237        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1238        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1239        let r = _mm256_dpwssds_epi32(src, a, b);
1240        let e = _mm256_set1_epi32(3);
1241        assert_eq_m256i(r, e);
1242    }
1243
1244    #[simd_test(enable = "avx512vnni,avx512vl")]
1245    unsafe fn test_mm256_mask_dpwssds_epi32() {
1246        let src = _mm256_set1_epi32(1);
1247        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1248        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1249        let r = _mm256_mask_dpwssds_epi32(src, 0b00000000, a, b);
1250        assert_eq_m256i(r, src);
1251        let r = _mm256_mask_dpwssds_epi32(src, 0b11111111, a, b);
1252        let e = _mm256_set1_epi32(3);
1253        assert_eq_m256i(r, e);
1254    }
1255
1256    #[simd_test(enable = "avx512vnni,avx512vl")]
1257    unsafe fn test_mm256_maskz_dpwssds_epi32() {
1258        let src = _mm256_set1_epi32(1);
1259        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1260        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1261        let r = _mm256_maskz_dpwssds_epi32(0b00000000, src, a, b);
1262        assert_eq_m256i(r, _mm256_setzero_si256());
1263        let r = _mm256_maskz_dpwssds_epi32(0b11111111, src, a, b);
1264        let e = _mm256_set1_epi32(3);
1265        assert_eq_m256i(r, e);
1266    }
1267
1268    #[simd_test(enable = "avxvnni")]
1269    unsafe fn test_mm_dpwssds_avx_epi32() {
1270        let src = _mm_set1_epi32(1);
1271        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1272        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1273        let r = _mm_dpwssds_avx_epi32(src, a, b);
1274        let e = _mm_set1_epi32(3);
1275        assert_eq_m128i(r, e);
1276    }
1277
1278    #[simd_test(enable = "avx512vnni,avx512vl")]
1279    unsafe fn test_mm_dpwssds_epi32() {
1280        let src = _mm_set1_epi32(1);
1281        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1282        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1283        let r = _mm_dpwssds_epi32(src, a, b);
1284        let e = _mm_set1_epi32(3);
1285        assert_eq_m128i(r, e);
1286    }
1287
1288    #[simd_test(enable = "avx512vnni,avx512vl")]
1289    unsafe fn test_mm_mask_dpwssds_epi32() {
1290        let src = _mm_set1_epi32(1);
1291        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1292        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1293        let r = _mm_mask_dpwssds_epi32(src, 0b00000000, a, b);
1294        assert_eq_m128i(r, src);
1295        let r = _mm_mask_dpwssds_epi32(src, 0b00001111, a, b);
1296        let e = _mm_set1_epi32(3);
1297        assert_eq_m128i(r, e);
1298    }
1299
1300    #[simd_test(enable = "avx512vnni,avx512vl")]
1301    unsafe fn test_mm_maskz_dpwssds_epi32() {
1302        let src = _mm_set1_epi32(1);
1303        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1304        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1305        let r = _mm_maskz_dpwssds_epi32(0b00000000, src, a, b);
1306        assert_eq_m128i(r, _mm_setzero_si128());
1307        let r = _mm_maskz_dpwssds_epi32(0b00001111, src, a, b);
1308        let e = _mm_set1_epi32(3);
1309        assert_eq_m128i(r, e);
1310    }
1311
1312    #[simd_test(enable = "avx512vnni")]
1313    unsafe fn test_mm512_dpbusd_epi32() {
1314        let src = _mm512_set1_epi32(1);
1315        let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1316        let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1317        let r = _mm512_dpbusd_epi32(src, a, b);
1318        let e = _mm512_set1_epi32(5);
1319        assert_eq_m512i(r, e);
1320    }
1321
1322    #[simd_test(enable = "avx512vnni")]
1323    unsafe fn test_mm512_mask_dpbusd_epi32() {
1324        let src = _mm512_set1_epi32(1);
1325        let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1326        let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1327        let r = _mm512_mask_dpbusd_epi32(src, 0b00000000_00000000, a, b);
1328        assert_eq_m512i(r, src);
1329        let r = _mm512_mask_dpbusd_epi32(src, 0b11111111_11111111, a, b);
1330        let e = _mm512_set1_epi32(5);
1331        assert_eq_m512i(r, e);
1332    }
1333
1334    #[simd_test(enable = "avx512vnni")]
1335    unsafe fn test_mm512_maskz_dpbusd_epi32() {
1336        let src = _mm512_set1_epi32(1);
1337        let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1338        let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1339        let r = _mm512_maskz_dpbusd_epi32(0b00000000_00000000, src, a, b);
1340        assert_eq_m512i(r, _mm512_setzero_si512());
1341        let r = _mm512_maskz_dpbusd_epi32(0b11111111_11111111, src, a, b);
1342        let e = _mm512_set1_epi32(5);
1343        assert_eq_m512i(r, e);
1344    }
1345
1346    #[simd_test(enable = "avxvnni")]
1347    unsafe fn test_mm256_dpbusd_avx_epi32() {
1348        let src = _mm256_set1_epi32(1);
1349        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1350        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1351        let r = _mm256_dpbusd_avx_epi32(src, a, b);
1352        let e = _mm256_set1_epi32(5);
1353        assert_eq_m256i(r, e);
1354    }
1355
1356    #[simd_test(enable = "avx512vnni,avx512vl")]
1357    unsafe fn test_mm256_dpbusd_epi32() {
1358        let src = _mm256_set1_epi32(1);
1359        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1360        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1361        let r = _mm256_dpbusd_epi32(src, a, b);
1362        let e = _mm256_set1_epi32(5);
1363        assert_eq_m256i(r, e);
1364    }
1365
1366    #[simd_test(enable = "avx512vnni,avx512vl")]
1367    unsafe fn test_mm256_mask_dpbusd_epi32() {
1368        let src = _mm256_set1_epi32(1);
1369        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1370        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1371        let r = _mm256_mask_dpbusd_epi32(src, 0b00000000, a, b);
1372        assert_eq_m256i(r, src);
1373        let r = _mm256_mask_dpbusd_epi32(src, 0b11111111, a, b);
1374        let e = _mm256_set1_epi32(5);
1375        assert_eq_m256i(r, e);
1376    }
1377
1378    #[simd_test(enable = "avx512vnni,avx512vl")]
1379    unsafe fn test_mm256_maskz_dpbusd_epi32() {
1380        let src = _mm256_set1_epi32(1);
1381        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1382        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1383        let r = _mm256_maskz_dpbusd_epi32(0b00000000, src, a, b);
1384        assert_eq_m256i(r, _mm256_setzero_si256());
1385        let r = _mm256_maskz_dpbusd_epi32(0b11111111, src, a, b);
1386        let e = _mm256_set1_epi32(5);
1387        assert_eq_m256i(r, e);
1388    }
1389
1390    #[simd_test(enable = "avxvnni")]
1391    unsafe fn test_mm_dpbusd_avx_epi32() {
1392        let src = _mm_set1_epi32(1);
1393        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1394        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1395        let r = _mm_dpbusd_avx_epi32(src, a, b);
1396        let e = _mm_set1_epi32(5);
1397        assert_eq_m128i(r, e);
1398    }
1399
1400    #[simd_test(enable = "avx512vnni,avx512vl")]
1401    unsafe fn test_mm_dpbusd_epi32() {
1402        let src = _mm_set1_epi32(1);
1403        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1404        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1405        let r = _mm_dpbusd_epi32(src, a, b);
1406        let e = _mm_set1_epi32(5);
1407        assert_eq_m128i(r, e);
1408    }
1409
1410    #[simd_test(enable = "avx512vnni,avx512vl")]
1411    unsafe fn test_mm_mask_dpbusd_epi32() {
1412        let src = _mm_set1_epi32(1);
1413        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1414        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1415        let r = _mm_mask_dpbusd_epi32(src, 0b00000000, a, b);
1416        assert_eq_m128i(r, src);
1417        let r = _mm_mask_dpbusd_epi32(src, 0b00001111, a, b);
1418        let e = _mm_set1_epi32(5);
1419        assert_eq_m128i(r, e);
1420    }
1421
1422    #[simd_test(enable = "avx512vnni,avx512vl")]
1423    unsafe fn test_mm_maskz_dpbusd_epi32() {
1424        let src = _mm_set1_epi32(1);
1425        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1426        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1427        let r = _mm_maskz_dpbusd_epi32(0b00000000, src, a, b);
1428        assert_eq_m128i(r, _mm_setzero_si128());
1429        let r = _mm_maskz_dpbusd_epi32(0b00001111, src, a, b);
1430        let e = _mm_set1_epi32(5);
1431        assert_eq_m128i(r, e);
1432    }
1433
1434    #[simd_test(enable = "avx512vnni")]
1435    unsafe fn test_mm512_dpbusds_epi32() {
1436        let src = _mm512_set1_epi32(1);
1437        let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1438        let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1439        let r = _mm512_dpbusds_epi32(src, a, b);
1440        let e = _mm512_set1_epi32(5);
1441        assert_eq_m512i(r, e);
1442    }
1443
1444    #[simd_test(enable = "avx512vnni")]
1445    unsafe fn test_mm512_mask_dpbusds_epi32() {
1446        let src = _mm512_set1_epi32(1);
1447        let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1448        let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1449        let r = _mm512_mask_dpbusds_epi32(src, 0b00000000_00000000, a, b);
1450        assert_eq_m512i(r, src);
1451        let r = _mm512_mask_dpbusds_epi32(src, 0b11111111_11111111, a, b);
1452        let e = _mm512_set1_epi32(5);
1453        assert_eq_m512i(r, e);
1454    }
1455
1456    #[simd_test(enable = "avx512vnni")]
1457    unsafe fn test_mm512_maskz_dpbusds_epi32() {
1458        let src = _mm512_set1_epi32(1);
1459        let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1460        let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1461        let r = _mm512_maskz_dpbusds_epi32(0b00000000_00000000, src, a, b);
1462        assert_eq_m512i(r, _mm512_setzero_si512());
1463        let r = _mm512_maskz_dpbusds_epi32(0b11111111_11111111, src, a, b);
1464        let e = _mm512_set1_epi32(5);
1465        assert_eq_m512i(r, e);
1466    }
1467
1468    #[simd_test(enable = "avxvnni")]
1469    unsafe fn test_mm256_dpbusds_avx_epi32() {
1470        let src = _mm256_set1_epi32(1);
1471        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1472        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1473        let r = _mm256_dpbusds_avx_epi32(src, a, b);
1474        let e = _mm256_set1_epi32(5);
1475        assert_eq_m256i(r, e);
1476    }
1477
1478    #[simd_test(enable = "avx512vnni,avx512vl")]
1479    unsafe fn test_mm256_dpbusds_epi32() {
1480        let src = _mm256_set1_epi32(1);
1481        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1482        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1483        let r = _mm256_dpbusds_epi32(src, a, b);
1484        let e = _mm256_set1_epi32(5);
1485        assert_eq_m256i(r, e);
1486    }
1487
1488    #[simd_test(enable = "avx512vnni,avx512vl")]
1489    unsafe fn test_mm256_mask_dpbusds_epi32() {
1490        let src = _mm256_set1_epi32(1);
1491        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1492        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1493        let r = _mm256_mask_dpbusds_epi32(src, 0b00000000, a, b);
1494        assert_eq_m256i(r, src);
1495        let r = _mm256_mask_dpbusds_epi32(src, 0b11111111, a, b);
1496        let e = _mm256_set1_epi32(5);
1497        assert_eq_m256i(r, e);
1498    }
1499
1500    #[simd_test(enable = "avx512vnni,avx512vl")]
1501    unsafe fn test_mm256_maskz_dpbusds_epi32() {
1502        let src = _mm256_set1_epi32(1);
1503        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1504        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1505        let r = _mm256_maskz_dpbusds_epi32(0b00000000, src, a, b);
1506        assert_eq_m256i(r, _mm256_setzero_si256());
1507        let r = _mm256_maskz_dpbusds_epi32(0b11111111, src, a, b);
1508        let e = _mm256_set1_epi32(5);
1509        assert_eq_m256i(r, e);
1510    }
1511
1512    #[simd_test(enable = "avxvnni")]
1513    unsafe fn test_mm_dpbusds_avx_epi32() {
1514        let src = _mm_set1_epi32(1);
1515        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1516        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1517        let r = _mm_dpbusds_avx_epi32(src, a, b);
1518        let e = _mm_set1_epi32(5);
1519        assert_eq_m128i(r, e);
1520    }
1521
1522    #[simd_test(enable = "avx512vnni,avx512vl")]
1523    unsafe fn test_mm_dpbusds_epi32() {
1524        let src = _mm_set1_epi32(1);
1525        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1526        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1527        let r = _mm_dpbusds_epi32(src, a, b);
1528        let e = _mm_set1_epi32(5);
1529        assert_eq_m128i(r, e);
1530    }
1531
1532    #[simd_test(enable = "avx512vnni,avx512vl")]
1533    unsafe fn test_mm_mask_dpbusds_epi32() {
1534        let src = _mm_set1_epi32(1);
1535        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1536        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1537        let r = _mm_mask_dpbusds_epi32(src, 0b00000000, a, b);
1538        assert_eq_m128i(r, src);
1539        let r = _mm_mask_dpbusds_epi32(src, 0b00001111, a, b);
1540        let e = _mm_set1_epi32(5);
1541        assert_eq_m128i(r, e);
1542    }
1543
1544    #[simd_test(enable = "avx512vnni,avx512vl")]
1545    unsafe fn test_mm_maskz_dpbusds_epi32() {
1546        let src = _mm_set1_epi32(1);
1547        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1548        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1549        let r = _mm_maskz_dpbusds_epi32(0b00000000, src, a, b);
1550        assert_eq_m128i(r, _mm_setzero_si128());
1551        let r = _mm_maskz_dpbusds_epi32(0b00001111, src, a, b);
1552        let e = _mm_set1_epi32(5);
1553        assert_eq_m128i(r, e);
1554    }
1555
1556    #[simd_test(enable = "avxvnniint8")]
1557    unsafe fn test_mm_dpbssd_epi32() {
1558        let src = _mm_set1_epi32(1);
1559        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1560        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1561        let r = _mm_dpbssd_epi32(src, a, b);
1562        let e = _mm_set1_epi32(5);
1563        assert_eq_m128i(r, e);
1564    }
1565
1566    #[simd_test(enable = "avxvnniint8")]
1567    unsafe fn test_mm256_dpbssd_epi32() {
1568        let src = _mm256_set1_epi32(1);
1569        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1570        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1571        let r = _mm256_dpbssd_epi32(src, a, b);
1572        let e = _mm256_set1_epi32(5);
1573        assert_eq_m256i(r, e);
1574    }
1575
1576    #[simd_test(enable = "avxvnniint8")]
1577    unsafe fn test_mm_dpbssds_epi32() {
1578        let src = _mm_set1_epi32(1);
1579        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1580        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1581        let r = _mm_dpbssds_epi32(src, a, b);
1582        let e = _mm_set1_epi32(5);
1583        assert_eq_m128i(r, e);
1584    }
1585
1586    #[simd_test(enable = "avxvnniint8")]
1587    unsafe fn test_mm256_dpbssds_epi32() {
1588        let src = _mm256_set1_epi32(1);
1589        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1590        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1591        let r = _mm256_dpbssds_epi32(src, a, b);
1592        let e = _mm256_set1_epi32(5);
1593        assert_eq_m256i(r, e);
1594    }
1595
1596    #[simd_test(enable = "avxvnniint8")]
1597    unsafe fn test_mm_dpbsud_epi32() {
1598        let src = _mm_set1_epi32(1);
1599        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1600        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1601        let r = _mm_dpbsud_epi32(src, a, b);
1602        let e = _mm_set1_epi32(5);
1603        assert_eq_m128i(r, e);
1604    }
1605
1606    #[simd_test(enable = "avxvnniint8")]
1607    unsafe fn test_mm256_dpbsud_epi32() {
1608        let src = _mm256_set1_epi32(1);
1609        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1610        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1611        let r = _mm256_dpbsud_epi32(src, a, b);
1612        let e = _mm256_set1_epi32(5);
1613        assert_eq_m256i(r, e);
1614    }
1615
1616    #[simd_test(enable = "avxvnniint8")]
1617    unsafe fn test_mm_dpbsuds_epi32() {
1618        let src = _mm_set1_epi32(1);
1619        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1620        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1621        let r = _mm_dpbsuds_epi32(src, a, b);
1622        let e = _mm_set1_epi32(5);
1623        assert_eq_m128i(r, e);
1624    }
1625
1626    #[simd_test(enable = "avxvnniint8")]
1627    unsafe fn test_mm256_dpbsuds_epi32() {
1628        let src = _mm256_set1_epi32(1);
1629        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1630        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1631        let r = _mm256_dpbsuds_epi32(src, a, b);
1632        let e = _mm256_set1_epi32(5);
1633        assert_eq_m256i(r, e);
1634    }
1635
1636    #[simd_test(enable = "avxvnniint8")]
1637    unsafe fn test_mm_dpbuud_epi32() {
1638        let src = _mm_set1_epi32(1);
1639        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1640        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1641        let r = _mm_dpbuud_epi32(src, a, b);
1642        let e = _mm_set1_epi32(5);
1643        assert_eq_m128i(r, e);
1644    }
1645
1646    #[simd_test(enable = "avxvnniint8")]
1647    unsafe fn test_mm256_dpbuud_epi32() {
1648        let src = _mm256_set1_epi32(1);
1649        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1650        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1651        let r = _mm256_dpbuud_epi32(src, a, b);
1652        let e = _mm256_set1_epi32(5);
1653        assert_eq_m256i(r, e);
1654    }
1655
1656    #[simd_test(enable = "avxvnniint8")]
1657    unsafe fn test_mm_dpbuuds_epi32() {
1658        let src = _mm_set1_epi32(1);
1659        let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1660        let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1661        let r = _mm_dpbuuds_epi32(src, a, b);
1662        let e = _mm_set1_epi32(5);
1663        assert_eq_m128i(r, e);
1664    }
1665
1666    #[simd_test(enable = "avxvnniint8")]
1667    unsafe fn test_mm256_dpbuuds_epi32() {
1668        let src = _mm256_set1_epi32(1);
1669        let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1670        let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
1671        let r = _mm256_dpbuuds_epi32(src, a, b);
1672        let e = _mm256_set1_epi32(5);
1673        assert_eq_m256i(r, e);
1674    }
1675
1676    #[simd_test(enable = "avxvnniint16")]
1677    unsafe fn test_mm_dpwsud_epi32() {
1678        let src = _mm_set1_epi32(1);
1679        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1680        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1681        let r = _mm_dpwsud_epi32(src, a, b);
1682        let e = _mm_set1_epi32(3);
1683        assert_eq_m128i(r, e);
1684    }
1685
1686    #[simd_test(enable = "avxvnniint16")]
1687    unsafe fn test_mm256_dpwsud_epi32() {
1688        let src = _mm256_set1_epi32(1);
1689        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1690        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1691        let r = _mm256_dpwsud_epi32(src, a, b);
1692        let e = _mm256_set1_epi32(3);
1693        assert_eq_m256i(r, e);
1694    }
1695
1696    #[simd_test(enable = "avxvnniint16")]
1697    unsafe fn test_mm_dpwsuds_epi32() {
1698        let src = _mm_set1_epi32(1);
1699        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1700        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1701        let r = _mm_dpwsuds_epi32(src, a, b);
1702        let e = _mm_set1_epi32(3);
1703        assert_eq_m128i(r, e);
1704    }
1705
1706    #[simd_test(enable = "avxvnniint16")]
1707    unsafe fn test_mm256_dpwsuds_epi32() {
1708        let src = _mm256_set1_epi32(1);
1709        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1710        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1711        let r = _mm256_dpwsuds_epi32(src, a, b);
1712        let e = _mm256_set1_epi32(3);
1713        assert_eq_m256i(r, e);
1714    }
1715
1716    #[simd_test(enable = "avxvnniint16")]
1717    unsafe fn test_mm_dpwusd_epi32() {
1718        let src = _mm_set1_epi32(1);
1719        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1720        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1721        let r = _mm_dpwusd_epi32(src, a, b);
1722        let e = _mm_set1_epi32(3);
1723        assert_eq_m128i(r, e);
1724    }
1725
1726    #[simd_test(enable = "avxvnniint16")]
1727    unsafe fn test_mm256_dpwusd_epi32() {
1728        let src = _mm256_set1_epi32(1);
1729        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1730        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1731        let r = _mm256_dpwusd_epi32(src, a, b);
1732        let e = _mm256_set1_epi32(3);
1733        assert_eq_m256i(r, e);
1734    }
1735
1736    #[simd_test(enable = "avxvnniint16")]
1737    unsafe fn test_mm_dpwusds_epi32() {
1738        let src = _mm_set1_epi32(1);
1739        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1740        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1741        let r = _mm_dpwusds_epi32(src, a, b);
1742        let e = _mm_set1_epi32(3);
1743        assert_eq_m128i(r, e);
1744    }
1745
1746    #[simd_test(enable = "avxvnniint16")]
1747    unsafe fn test_mm256_dpwusds_epi32() {
1748        let src = _mm256_set1_epi32(1);
1749        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1750        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1751        let r = _mm256_dpwusds_epi32(src, a, b);
1752        let e = _mm256_set1_epi32(3);
1753        assert_eq_m256i(r, e);
1754    }
1755
1756    #[simd_test(enable = "avxvnniint16")]
1757    unsafe fn test_mm_dpwuud_epi32() {
1758        let src = _mm_set1_epi32(1);
1759        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1760        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1761        let r = _mm_dpwuud_epi32(src, a, b);
1762        let e = _mm_set1_epi32(3);
1763        assert_eq_m128i(r, e);
1764    }
1765
1766    #[simd_test(enable = "avxvnniint16")]
1767    unsafe fn test_mm256_dpwuud_epi32() {
1768        let src = _mm256_set1_epi32(1);
1769        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1770        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1771        let r = _mm256_dpwuud_epi32(src, a, b);
1772        let e = _mm256_set1_epi32(3);
1773        assert_eq_m256i(r, e);
1774    }
1775
1776    #[simd_test(enable = "avxvnniint16")]
1777    unsafe fn test_mm_dpwuuds_epi32() {
1778        let src = _mm_set1_epi32(1);
1779        let a = _mm_set1_epi32(1 << 16 | 1 << 0);
1780        let b = _mm_set1_epi32(1 << 16 | 1 << 0);
1781        let r = _mm_dpwuuds_epi32(src, a, b);
1782        let e = _mm_set1_epi32(3);
1783        assert_eq_m128i(r, e);
1784    }
1785
1786    #[simd_test(enable = "avxvnniint16")]
1787    unsafe fn test_mm256_dpwuuds_epi32() {
1788        let src = _mm256_set1_epi32(1);
1789        let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
1790        let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
1791        let r = _mm256_dpwuuds_epi32(src, a, b);
1792        let e = _mm256_set1_epi32(3);
1793        assert_eq_m256i(r, e);
1794    }
1795}