Skip to content

Commit 5db14d0

Browse files
authored
Update relaxed simd instructions (bytecodealliance#766)
* Rename f32x4.fnma and f64x4.fnma instructions * Add relaxed_ prefix to all relaxed instructions Based on WebAssembly/relaxed-simd#42 (comment) * Renumber relaxed simd instructions * Implement i16x8.relaxed_q15mulr_s * Implement relaxed dot instructions
1 parent 3755661 commit 5db14d0

9 files changed

Lines changed: 220 additions & 127 deletions

File tree

crates/wasm-encoder/src/core/code.rs

Lines changed: 53 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -766,18 +766,22 @@ pub enum Instruction<'a> {
766766
I32x4RelaxedTruncSatF32x4U,
767767
I32x4RelaxedTruncSatF64x2SZero,
768768
I32x4RelaxedTruncSatF64x2UZero,
769-
F32x4Fma,
770-
F32x4Fms,
771-
F64x2Fma,
772-
F64x2Fms,
773-
I8x16LaneSelect,
774-
I16x8LaneSelect,
775-
I32x4LaneSelect,
776-
I64x2LaneSelect,
769+
F32x4RelaxedFma,
770+
F32x4RelaxedFnma,
771+
F64x2RelaxedFma,
772+
F64x2RelaxedFnma,
773+
I8x16RelaxedLaneselect,
774+
I16x8RelaxedLaneselect,
775+
I32x4RelaxedLaneselect,
776+
I64x2RelaxedLaneselect,
777777
F32x4RelaxedMin,
778778
F32x4RelaxedMax,
779779
F64x2RelaxedMin,
780780
F64x2RelaxedMax,
781+
I16x8RelaxedQ15mulrS,
782+
I16x8DotI8x16I7x16S,
783+
I32x4DotI8x16I7x16AddS,
784+
F32x4RelaxedDotBf16x8AddF32x4,
781785

782786
// Atomic instructions (the threads proposal)
783787
MemoryAtomicNotify { memarg: MemArg },
@@ -2321,71 +2325,87 @@ impl Encode for Instruction<'_> {
23212325
}
23222326
Instruction::I8x16RelaxedSwizzle => {
23232327
sink.push(0xFD);
2324-
0xA2u32.encode(sink);
2328+
0x100u32.encode(sink);
23252329
}
23262330
Instruction::I32x4RelaxedTruncSatF32x4S => {
23272331
sink.push(0xFD);
2328-
0xA5u32.encode(sink);
2332+
0x101u32.encode(sink);
23292333
}
23302334
Instruction::I32x4RelaxedTruncSatF32x4U => {
23312335
sink.push(0xFD);
2332-
0xA6u32.encode(sink);
2336+
0x102u32.encode(sink);
23332337
}
23342338
Instruction::I32x4RelaxedTruncSatF64x2SZero => {
23352339
sink.push(0xFD);
2336-
0xC5u32.encode(sink);
2340+
0x103u32.encode(sink);
23372341
}
23382342
Instruction::I32x4RelaxedTruncSatF64x2UZero => {
23392343
sink.push(0xFD);
2340-
0xC6u32.encode(sink);
2344+
0x104u32.encode(sink);
23412345
}
2342-
Instruction::F32x4Fma => {
2346+
Instruction::F32x4RelaxedFma => {
23432347
sink.push(0xFD);
2344-
0xAFu32.encode(sink);
2348+
0x105u32.encode(sink);
23452349
}
2346-
Instruction::F32x4Fms => {
2350+
Instruction::F32x4RelaxedFnma => {
23472351
sink.push(0xFD);
2348-
0xB0u32.encode(sink);
2352+
0x106u32.encode(sink);
23492353
}
2350-
Instruction::F64x2Fma => {
2354+
Instruction::F64x2RelaxedFma => {
23512355
sink.push(0xFD);
2352-
0xCFu32.encode(sink);
2356+
0x107u32.encode(sink);
23532357
}
2354-
Instruction::F64x2Fms => {
2358+
Instruction::F64x2RelaxedFnma => {
23552359
sink.push(0xFD);
2356-
0xD0u32.encode(sink);
2360+
0x108u32.encode(sink);
23572361
}
2358-
Instruction::I8x16LaneSelect => {
2362+
Instruction::I8x16RelaxedLaneselect => {
23592363
sink.push(0xFD);
2360-
0xB2u32.encode(sink);
2364+
0x109u32.encode(sink);
23612365
}
2362-
Instruction::I16x8LaneSelect => {
2366+
Instruction::I16x8RelaxedLaneselect => {
23632367
sink.push(0xFD);
2364-
0xB3u32.encode(sink);
2368+
0x10Au32.encode(sink);
23652369
}
2366-
Instruction::I32x4LaneSelect => {
2370+
Instruction::I32x4RelaxedLaneselect => {
23672371
sink.push(0xFD);
2368-
0xD2u32.encode(sink);
2372+
0x10Bu32.encode(sink);
23692373
}
2370-
Instruction::I64x2LaneSelect => {
2374+
Instruction::I64x2RelaxedLaneselect => {
23712375
sink.push(0xFD);
2372-
0xD3u32.encode(sink);
2376+
0x10Cu32.encode(sink);
23732377
}
23742378
Instruction::F32x4RelaxedMin => {
23752379
sink.push(0xFD);
2376-
0xB4u32.encode(sink);
2380+
0x10Du32.encode(sink);
23772381
}
23782382
Instruction::F32x4RelaxedMax => {
23792383
sink.push(0xFD);
2380-
0xE2u32.encode(sink);
2384+
0x10Eu32.encode(sink);
23812385
}
23822386
Instruction::F64x2RelaxedMin => {
23832387
sink.push(0xFD);
2384-
0xD4u32.encode(sink);
2388+
0x10Fu32.encode(sink);
23852389
}
23862390
Instruction::F64x2RelaxedMax => {
23872391
sink.push(0xFD);
2388-
0xEEu32.encode(sink);
2392+
0x110u32.encode(sink);
2393+
}
2394+
Instruction::I16x8RelaxedQ15mulrS => {
2395+
sink.push(0xFD);
2396+
0x111u32.encode(sink);
2397+
}
2398+
Instruction::I16x8DotI8x16I7x16S => {
2399+
sink.push(0xFD);
2400+
0x111u32.encode(sink);
2401+
}
2402+
Instruction::I32x4DotI8x16I7x16AddS => {
2403+
sink.push(0xFD);
2404+
0x111u32.encode(sink);
2405+
}
2406+
Instruction::F32x4RelaxedDotBf16x8AddF32x4 => {
2407+
sink.push(0xFD);
2408+
0x111u32.encode(sink);
23892409
}
23902410

23912411
// Atmoic instructions from the thread proposal

crates/wasm-mutate/src/mutators/translate.rs

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -839,18 +839,22 @@ pub fn op(t: &mut dyn Translator, op: &Operator<'_>) -> Result<Instruction<'stat
839839
O::I32x4RelaxedTruncSatF32x4U => I::I32x4RelaxedTruncSatF32x4U,
840840
O::I32x4RelaxedTruncSatF64x2SZero => I::I32x4RelaxedTruncSatF64x2SZero,
841841
O::I32x4RelaxedTruncSatF64x2UZero => I::I32x4RelaxedTruncSatF64x2UZero,
842-
O::F32x4Fma => I::F32x4Fma,
843-
O::F32x4Fms => I::F32x4Fms,
844-
O::F64x2Fma => I::F64x2Fma,
845-
O::F64x2Fms => I::F64x2Fms,
846-
O::I8x16LaneSelect => I::I8x16LaneSelect,
847-
O::I16x8LaneSelect => I::I16x8LaneSelect,
848-
O::I32x4LaneSelect => I::I32x4LaneSelect,
849-
O::I64x2LaneSelect => I::I64x2LaneSelect,
842+
O::F32x4RelaxedFma => I::F32x4RelaxedFma,
843+
O::F32x4RelaxedFnma => I::F32x4RelaxedFnma,
844+
O::F64x2RelaxedFma => I::F64x2RelaxedFma,
845+
O::F64x2RelaxedFnma => I::F64x2RelaxedFnma,
846+
O::I8x16RelaxedLaneselect => I::I8x16RelaxedLaneselect,
847+
O::I16x8RelaxedLaneselect => I::I16x8RelaxedLaneselect,
848+
O::I32x4RelaxedLaneselect => I::I32x4RelaxedLaneselect,
849+
O::I64x2RelaxedLaneselect => I::I64x2RelaxedLaneselect,
850850
O::F32x4RelaxedMin => I::F32x4RelaxedMin,
851851
O::F32x4RelaxedMax => I::F32x4RelaxedMax,
852852
O::F64x2RelaxedMin => I::F64x2RelaxedMin,
853853
O::F64x2RelaxedMax => I::F64x2RelaxedMax,
854+
O::I16x8RelaxedQ15mulrS => I::I16x8RelaxedQ15mulrS,
855+
O::I16x8DotI8x16I7x16S => I::I16x8DotI8x16I7x16S,
856+
O::I32x4DotI8x16I7x16AddS => I::I32x4DotI8x16I7x16AddS,
857+
O::F32x4RelaxedDotBf16x8AddF32x4 => I::F32x4RelaxedDotBf16x8AddF32x4,
854858

855859
// Note that these cases are not supported in `wasm_encoder` yet,
856860
// and in general `wasmparser` often parses more things than

crates/wasm-smith/src/core/code_builder.rs

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -545,13 +545,17 @@ instructions! {
545545
(Some(simd_v128_on_stack_relaxed), i32x4_relaxed_trunc_sat_f64x2s_zero, Vector),
546546
(Some(simd_v128_on_stack_relaxed), i32x4_relaxed_trunc_sat_f64x2u_zero, Vector),
547547
(Some(simd_v128_v128_v128_on_stack_relaxed), f32x4_fma, Vector),
548-
(Some(simd_v128_v128_v128_on_stack_relaxed), f32x4_fms, Vector),
548+
(Some(simd_v128_v128_v128_on_stack_relaxed), f32x4_fnma, Vector),
549549
(Some(simd_v128_v128_v128_on_stack_relaxed), f64x2_fma, Vector),
550-
(Some(simd_v128_v128_v128_on_stack_relaxed), f64x2_fms, Vector),
550+
(Some(simd_v128_v128_v128_on_stack_relaxed), f64x2_fnma, Vector),
551551
(Some(simd_v128_v128_on_stack_relaxed), f32x4_relaxed_min, Vector),
552552
(Some(simd_v128_v128_on_stack_relaxed), f32x4_relaxed_max, Vector),
553553
(Some(simd_v128_v128_on_stack_relaxed), f64x2_relaxed_min, Vector),
554554
(Some(simd_v128_v128_on_stack_relaxed), f64x2_relaxed_max, Vector),
555+
(Some(simd_v128_v128_on_stack_relaxed), i16x8_relaxed_q15mulr_s, Vector),
556+
(Some(simd_v128_v128_on_stack_relaxed), i16x8_dot_i8x16_i7x16_s, Vector),
557+
(Some(simd_v128_v128_v128_on_stack_relaxed), i32x4_dot_i8x16_i7x16_add_s, Vector),
558+
(Some(simd_v128_v128_v128_on_stack_relaxed), f32x4_relaxed_dot_bf16x8_add_f32x4, Vector),
555559
}
556560

557561
pub(crate) struct CodeBuilderAllocations {
@@ -3956,15 +3960,22 @@ simd_unop!(
39563960
I32x4RelaxedTruncSatF64x2UZero,
39573961
i32x4_relaxed_trunc_sat_f64x2u_zero
39583962
);
3959-
simd_ternop!(F32x4Fma, f32x4_fma);
3960-
simd_ternop!(F32x4Fms, f32x4_fms);
3961-
simd_ternop!(F64x2Fma, f64x2_fma);
3962-
simd_ternop!(F64x2Fms, f64x2_fms);
3963-
simd_ternop!(I8x16LaneSelect, i8x16_laneselect);
3964-
simd_ternop!(I16x8LaneSelect, i16x8_laneselect);
3965-
simd_ternop!(I32x4LaneSelect, i32x4_laneselect);
3966-
simd_ternop!(I64x2LaneSelect, i64x2_laneselect);
3963+
simd_ternop!(F32x4RelaxedFma, f32x4_fma);
3964+
simd_ternop!(F32x4RelaxedFnma, f32x4_fnma);
3965+
simd_ternop!(F64x2RelaxedFma, f64x2_fma);
3966+
simd_ternop!(F64x2RelaxedFnma, f64x2_fnma);
3967+
simd_ternop!(I8x16RelaxedLaneselect, i8x16_laneselect);
3968+
simd_ternop!(I16x8RelaxedLaneselect, i16x8_laneselect);
3969+
simd_ternop!(I32x4RelaxedLaneselect, i32x4_laneselect);
3970+
simd_ternop!(I64x2RelaxedLaneselect, i64x2_laneselect);
39673971
simd_binop!(F32x4RelaxedMin, f32x4_relaxed_min);
39683972
simd_binop!(F32x4RelaxedMax, f32x4_relaxed_max);
39693973
simd_binop!(F64x2RelaxedMin, f64x2_relaxed_min);
39703974
simd_binop!(F64x2RelaxedMax, f64x2_relaxed_max);
3975+
simd_binop!(I16x8RelaxedQ15mulrS, i16x8_relaxed_q15mulr_s);
3976+
simd_binop!(I16x8DotI8x16I7x16S, i16x8_dot_i8x16_i7x16_s);
3977+
simd_ternop!(I32x4DotI8x16I7x16AddS, i32x4_dot_i8x16_i7x16_add_s);
3978+
simd_ternop!(
3979+
F32x4RelaxedDotBf16x8AddF32x4,
3980+
f32x4_relaxed_dot_bf16x8_add_f32x4
3981+
);

crates/wasmparser/src/binary_reader.rs

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1889,12 +1889,9 @@ impl<'a> BinaryReader<'a> {
18891889
0x9e => visitor.visit_i16x8_extmul_low_i8x16_u(pos),
18901890
0x9f => visitor.visit_i16x8_extmul_high_i8x16_u(pos),
18911891
0xa0 => visitor.visit_i32x4_abs(pos),
1892-
0xa2 => visitor.visit_i8x16_relaxed_swizzle(pos),
18931892
0xa1 => visitor.visit_i32x4_neg(pos),
18941893
0xa3 => visitor.visit_i32x4_all_true(pos),
18951894
0xa4 => visitor.visit_i32x4_bitmask(pos),
1896-
0xa5 => visitor.visit_i32x4_relaxed_trunc_sat_f32x4_s(pos),
1897-
0xa6 => visitor.visit_i32x4_relaxed_trunc_sat_f32x4_u(pos),
18981895
0xa7 => visitor.visit_i32x4_extend_low_i16x8_s(pos),
18991896
0xa8 => visitor.visit_i32x4_extend_high_i16x8_s(pos),
19001897
0xa9 => visitor.visit_i32x4_extend_low_i16x8_u(pos),
@@ -1903,12 +1900,7 @@ impl<'a> BinaryReader<'a> {
19031900
0xac => visitor.visit_i32x4_shr_s(pos),
19041901
0xad => visitor.visit_i32x4_shr_u(pos),
19051902
0xae => visitor.visit_i32x4_add(pos),
1906-
0xaf => visitor.visit_f32x4_fma(pos),
1907-
0xb0 => visitor.visit_f32x4_fms(pos),
19081903
0xb1 => visitor.visit_i32x4_sub(pos),
1909-
0xb2 => visitor.visit_i8x16_laneselect(pos),
1910-
0xb3 => visitor.visit_i16x8_laneselect(pos),
1911-
0xb4 => visitor.visit_f32x4_relaxed_min(pos),
19121904
0xb5 => visitor.visit_i32x4_mul(pos),
19131905
0xb6 => visitor.visit_i32x4_min_s(pos),
19141906
0xb7 => visitor.visit_i32x4_min_u(pos),
@@ -1923,8 +1915,6 @@ impl<'a> BinaryReader<'a> {
19231915
0xc1 => visitor.visit_i64x2_neg(pos),
19241916
0xc3 => visitor.visit_i64x2_all_true(pos),
19251917
0xc4 => visitor.visit_i64x2_bitmask(pos),
1926-
0xc5 => visitor.visit_i32x4_relaxed_trunc_sat_f64x2_s_zero(pos),
1927-
0xc6 => visitor.visit_i32x4_relaxed_trunc_sat_f64x2_u_zero(pos),
19281918
0xc7 => visitor.visit_i64x2_extend_low_i32x4_s(pos),
19291919
0xc8 => visitor.visit_i64x2_extend_high_i32x4_s(pos),
19301920
0xc9 => visitor.visit_i64x2_extend_low_i32x4_u(pos),
@@ -1933,12 +1923,7 @@ impl<'a> BinaryReader<'a> {
19331923
0xcc => visitor.visit_i64x2_shr_s(pos),
19341924
0xcd => visitor.visit_i64x2_shr_u(pos),
19351925
0xce => visitor.visit_i64x2_add(pos),
1936-
0xcf => visitor.visit_f64x2_fma(pos),
1937-
0xd0 => visitor.visit_f64x2_fms(pos),
19381926
0xd1 => visitor.visit_i64x2_sub(pos),
1939-
0xd2 => visitor.visit_i32x4_laneselect(pos),
1940-
0xd3 => visitor.visit_i64x2_laneselect(pos),
1941-
0xd4 => visitor.visit_f64x2_relaxed_min(pos),
19421927
0xd5 => visitor.visit_i64x2_mul(pos),
19431928
0xd6 => visitor.visit_i64x2_eq(pos),
19441929
0xd7 => visitor.visit_i64x2_ne(pos),
@@ -1952,7 +1937,6 @@ impl<'a> BinaryReader<'a> {
19521937
0xdf => visitor.visit_i64x2_extmul_high_i32x4_u(pos),
19531938
0xe0 => visitor.visit_f32x4_abs(pos),
19541939
0xe1 => visitor.visit_f32x4_neg(pos),
1955-
0xe2 => visitor.visit_f32x4_relaxed_max(pos),
19561940
0xe3 => visitor.visit_f32x4_sqrt(pos),
19571941
0xe4 => visitor.visit_f32x4_add(pos),
19581942
0xe5 => visitor.visit_f32x4_sub(pos),
@@ -1964,7 +1948,6 @@ impl<'a> BinaryReader<'a> {
19641948
0xeb => visitor.visit_f32x4_pmax(pos),
19651949
0xec => visitor.visit_f64x2_abs(pos),
19661950
0xed => visitor.visit_f64x2_neg(pos),
1967-
0xee => visitor.visit_f64x2_relaxed_max(pos),
19681951
0xef => visitor.visit_f64x2_sqrt(pos),
19691952
0xf0 => visitor.visit_f64x2_add(pos),
19701953
0xf1 => visitor.visit_f64x2_sub(pos),
@@ -1982,6 +1965,27 @@ impl<'a> BinaryReader<'a> {
19821965
0xfd => visitor.visit_i32x4_trunc_sat_f64x2_u_zero(pos),
19831966
0xfe => visitor.visit_f64x2_convert_low_i32x4_s(pos),
19841967
0xff => visitor.visit_f64x2_convert_low_i32x4_u(pos),
1968+
0x100 => visitor.visit_i8x16_relaxed_swizzle(pos),
1969+
0x101 => visitor.visit_i32x4_relaxed_trunc_sat_f32x4_s(pos),
1970+
0x102 => visitor.visit_i32x4_relaxed_trunc_sat_f32x4_u(pos),
1971+
0x103 => visitor.visit_i32x4_relaxed_trunc_sat_f64x2_s_zero(pos),
1972+
0x104 => visitor.visit_i32x4_relaxed_trunc_sat_f64x2_u_zero(pos),
1973+
0x105 => visitor.visit_f32x4_relaxed_fma(pos),
1974+
0x106 => visitor.visit_f32x4_relaxed_fnma(pos),
1975+
0x107 => visitor.visit_f64x2_relaxed_fma(pos),
1976+
0x108 => visitor.visit_f64x2_relaxed_fnma(pos),
1977+
0x109 => visitor.visit_i8x16_relaxed_laneselect(pos),
1978+
0x10a => visitor.visit_i16x8_relaxed_laneselect(pos),
1979+
0x10b => visitor.visit_i32x4_relaxed_laneselect(pos),
1980+
0x10c => visitor.visit_i64x2_relaxed_laneselect(pos),
1981+
0x10d => visitor.visit_f32x4_relaxed_min(pos),
1982+
0x10e => visitor.visit_f32x4_relaxed_max(pos),
1983+
0x10f => visitor.visit_f64x2_relaxed_min(pos),
1984+
0x110 => visitor.visit_f64x2_relaxed_max(pos),
1985+
0x111 => visitor.visit_i16x8_relaxed_q15mulr_s(pos),
1986+
0x112 => visitor.visit_i16x8_dot_i8x16_i7x16_s(pos),
1987+
0x113 => visitor.visit_i32x4_dot_i8x16_i7x16_add_s(pos),
1988+
0x114 => visitor.visit_f32x4_relaxed_dot_bf16x8_add_f32x4(pos),
19851989

19861990
_ => bail!(pos, "unknown 0xfd subopcode: 0x{code:x}"),
19871991
})

crates/wasmparser/src/lib.rs

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -647,18 +647,22 @@ macro_rules! for_each_operator {
647647
@relaxed_simd I32x4RelaxedTruncSatF32x4U => visit_i32x4_relaxed_trunc_sat_f32x4_u
648648
@relaxed_simd I32x4RelaxedTruncSatF64x2SZero => visit_i32x4_relaxed_trunc_sat_f64x2_s_zero
649649
@relaxed_simd I32x4RelaxedTruncSatF64x2UZero => visit_i32x4_relaxed_trunc_sat_f64x2_u_zero
650-
@relaxed_simd F32x4Fma => visit_f32x4_fma
651-
@relaxed_simd F32x4Fms => visit_f32x4_fms
652-
@relaxed_simd F64x2Fma => visit_f64x2_fma
653-
@relaxed_simd F64x2Fms => visit_f64x2_fms
654-
@relaxed_simd I8x16LaneSelect => visit_i8x16_laneselect
655-
@relaxed_simd I16x8LaneSelect => visit_i16x8_laneselect
656-
@relaxed_simd I32x4LaneSelect => visit_i32x4_laneselect
657-
@relaxed_simd I64x2LaneSelect => visit_i64x2_laneselect
650+
@relaxed_simd F32x4RelaxedFma => visit_f32x4_relaxed_fma
651+
@relaxed_simd F32x4RelaxedFnma => visit_f32x4_relaxed_fnma
652+
@relaxed_simd F64x2RelaxedFma => visit_f64x2_relaxed_fma
653+
@relaxed_simd F64x2RelaxedFnma => visit_f64x2_relaxed_fnma
654+
@relaxed_simd I8x16RelaxedLaneselect => visit_i8x16_relaxed_laneselect
655+
@relaxed_simd I16x8RelaxedLaneselect => visit_i16x8_relaxed_laneselect
656+
@relaxed_simd I32x4RelaxedLaneselect => visit_i32x4_relaxed_laneselect
657+
@relaxed_simd I64x2RelaxedLaneselect => visit_i64x2_relaxed_laneselect
658658
@relaxed_simd F32x4RelaxedMin => visit_f32x4_relaxed_min
659659
@relaxed_simd F32x4RelaxedMax => visit_f32x4_relaxed_max
660660
@relaxed_simd F64x2RelaxedMin => visit_f64x2_relaxed_min
661661
@relaxed_simd F64x2RelaxedMax => visit_f64x2_relaxed_max
662+
@relaxed_simd I16x8RelaxedQ15mulrS => visit_i16x8_relaxed_q15mulr_s
663+
@relaxed_simd I16x8DotI8x16I7x16S => visit_i16x8_dot_i8x16_i7x16_s
664+
@relaxed_simd I32x4DotI8x16I7x16AddS => visit_i32x4_dot_i8x16_i7x16_add_s
665+
@relaxed_simd F32x4RelaxedDotBf16x8AddF32x4 => visit_f32x4_relaxed_dot_bf16x8_add_f32x4
662666
}
663667
};
664668
}

0 commit comments

Comments
 (0)