Skip to content

Commit baa0ae3

Browse files
committed
simd/archsimd: correct type and instruction for SaturateToUint8
It should be defined on unsigned types, not signed types, and use unsigned conversion instructions. Change-Id: I49694ccdf1d331cfde88591531c358d9886e83e6 Reviewed-on: https://go-review.googlesource.com/c/go/+/731500 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com>
1 parent d46c58d commit baa0ae3

9 files changed

Lines changed: 758 additions & 82 deletions

File tree

src/cmd/compile/internal/amd64/simdssa.go

Lines changed: 32 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/_gen/simdAMD64.rules

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -919,15 +919,15 @@
919919
(SaturateToInt32Int64x2 ...) => (VPMOVSQD128_128 ...)
920920
(SaturateToInt32Int64x4 ...) => (VPMOVSQD128_256 ...)
921921
(SaturateToInt32Int64x8 ...) => (VPMOVSQD256 ...)
922-
(SaturateToUint8Int16x8 ...) => (VPMOVSWB128_128 ...)
923-
(SaturateToUint8Int16x16 ...) => (VPMOVSWB128_256 ...)
924-
(SaturateToUint8Int32x4 ...) => (VPMOVSDB128_128 ...)
925-
(SaturateToUint8Int32x8 ...) => (VPMOVSDB128_256 ...)
926-
(SaturateToUint8Int32x16 ...) => (VPMOVSDB128_512 ...)
927-
(SaturateToUint8Int64x2 ...) => (VPMOVSQB128_128 ...)
928-
(SaturateToUint8Int64x4 ...) => (VPMOVSQB128_256 ...)
929-
(SaturateToUint8Int64x8 ...) => (VPMOVSQB128_512 ...)
922+
(SaturateToUint8Uint16x8 ...) => (VPMOVUSWB128_128 ...)
923+
(SaturateToUint8Uint16x16 ...) => (VPMOVUSWB128_256 ...)
930924
(SaturateToUint8Uint16x32 ...) => (VPMOVUSWB256 ...)
925+
(SaturateToUint8Uint32x4 ...) => (VPMOVUSDB128_128 ...)
926+
(SaturateToUint8Uint32x8 ...) => (VPMOVUSDB128_256 ...)
927+
(SaturateToUint8Uint32x16 ...) => (VPMOVUSDB128_512 ...)
928+
(SaturateToUint8Uint64x2 ...) => (VPMOVUSQB128_128 ...)
929+
(SaturateToUint8Uint64x4 ...) => (VPMOVUSQB128_256 ...)
930+
(SaturateToUint8Uint64x8 ...) => (VPMOVUSQB128_512 ...)
931931
(SaturateToUint16Uint32x4 ...) => (VPMOVUSDW128_128 ...)
932932
(SaturateToUint16Uint32x8 ...) => (VPMOVUSDW128_256 ...)
933933
(SaturateToUint16Uint32x16 ...) => (VPMOVUSDW256 ...)
@@ -1787,7 +1787,15 @@
17871787
(VMOVDQU64Masked128 (VPMOVSQD128_128 x) mask) => (VPMOVSQDMasked128_128 x mask)
17881788
(VMOVDQU64Masked256 (VPMOVSQD128_256 x) mask) => (VPMOVSQDMasked128_256 x mask)
17891789
(VMOVDQU64Masked256 (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256 x mask)
1790+
(VMOVDQU16Masked128 (VPMOVUSWB128_128 x) mask) => (VPMOVUSWBMasked128_128 x mask)
1791+
(VMOVDQU16Masked256 (VPMOVUSWB128_256 x) mask) => (VPMOVUSWBMasked128_256 x mask)
17901792
(VMOVDQU16Masked256 (VPMOVUSWB256 x) mask) => (VPMOVUSWBMasked256 x mask)
1793+
(VMOVDQU32Masked128 (VPMOVUSDB128_128 x) mask) => (VPMOVUSDBMasked128_128 x mask)
1794+
(VMOVDQU32Masked256 (VPMOVUSDB128_256 x) mask) => (VPMOVUSDBMasked128_256 x mask)
1795+
(VMOVDQU32Masked512 (VPMOVUSDB128_512 x) mask) => (VPMOVUSDBMasked128_512 x mask)
1796+
(VMOVDQU64Masked128 (VPMOVUSQB128_128 x) mask) => (VPMOVUSQBMasked128_128 x mask)
1797+
(VMOVDQU64Masked256 (VPMOVUSQB128_256 x) mask) => (VPMOVUSQBMasked128_256 x mask)
1798+
(VMOVDQU64Masked512 (VPMOVUSQB128_512 x) mask) => (VPMOVUSQBMasked128_512 x mask)
17911799
(VMOVDQU32Masked128 (VPACKUSDW128 x y) mask) => (VPACKUSDWMasked128 x y mask)
17921800
(VMOVDQU32Masked256 (VPACKUSDW256 x y) mask) => (VPACKUSDWMasked256 x y mask)
17931801
(VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512 x y mask)
@@ -2018,6 +2026,7 @@
20182026
(VPBLENDMDMasked512 dst (VPMOVDW256 x) mask) => (VPMOVDWMasked256Merging dst x mask)
20192027
(VPBLENDMDMasked512 dst (VPMOVSDB128_512 x) mask) => (VPMOVSDBMasked128_512Merging dst x mask)
20202028
(VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256Merging dst x mask)
2029+
(VPBLENDMDMasked512 dst (VPMOVUSDB128_512 x) mask) => (VPMOVUSDBMasked128_512Merging dst x mask)
20212030
(VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256Merging dst x mask)
20222031
(VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) => (VPMULLDMasked512Merging dst x y mask)
20232032
(VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) => (VPOPCNTDMasked512Merging dst x mask)
@@ -2071,6 +2080,7 @@
20712080
(VPBLENDMQMasked512 dst (VPMOVSQB128_512 x) mask) => (VPMOVSQBMasked128_512Merging dst x mask)
20722081
(VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256Merging dst x mask)
20732082
(VPBLENDMQMasked512 dst (VPMOVSQW128_512 x) mask) => (VPMOVSQWMasked128_512Merging dst x mask)
2083+
(VPBLENDMQMasked512 dst (VPMOVUSQB128_512 x) mask) => (VPMOVUSQBMasked128_512Merging dst x mask)
20742084
(VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) => (VPMOVUSQDMasked256Merging dst x mask)
20752085
(VPBLENDMQMasked512 dst (VPMOVUSQW128_512 x) mask) => (VPMOVUSQWMasked128_512Merging dst x mask)
20762086
(VPBLENDMQMasked512 dst (VPMULLQ512 x y) mask) => (VPMULLQMasked512Merging dst x y mask)
@@ -2235,9 +2245,12 @@
22352245
(VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
22362246
(VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
22372247
(VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
2248+
(VPBLENDVB128 dst (VPMOVUSDB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
22382249
(VPBLENDVB128 dst (VPMOVUSDW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
2250+
(VPBLENDVB128 dst (VPMOVUSQB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
22392251
(VPBLENDVB128 dst (VPMOVUSQD128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
22402252
(VPBLENDVB128 dst (VPMOVUSQW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQWMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
2253+
(VPBLENDVB128 dst (VPMOVUSWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
22412254
(VPBLENDVB128 dst (VPMOVWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
22422255
(VPBLENDVB128 dst (VPMOVZXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
22432256
(VPBLENDVB128 dst (VPMOVZXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
@@ -2396,9 +2409,12 @@
23962409
(VPBLENDVB256 dst (VPMOVSXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
23972410
(VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
23982411
(VPBLENDVB256 dst (VPMOVSXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
2412+
(VPBLENDVB256 dst (VPMOVUSDB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
23992413
(VPBLENDVB256 dst (VPMOVUSDW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
2414+
(VPBLENDVB256 dst (VPMOVUSQB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
24002415
(VPBLENDVB256 dst (VPMOVUSQD128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
24012416
(VPBLENDVB256 dst (VPMOVUSQW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQWMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
2417+
(VPBLENDVB256 dst (VPMOVUSWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
24022418
(VPBLENDVB256 dst (VPMOVWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
24032419
(VPBLENDVB256 dst (VPMOVZXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
24042420
(VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))

src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go

Lines changed: 24 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/_gen/simdgenericOps.go

Lines changed: 8 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)