From 5d0a12d3e9b1606c36430cf908da20d19d101e04 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Wed, 7 Aug 2024 16:36:15 -0700 Subject: [PATCH] [msan] Precommit tests for Arm NEON vector shift (#101420) MSan currently does not correctly instrument most (all?) Arm NEON vector shift instructions. This patch shows the current state of instrumentation. A followup patch will apply handleVectorShiftIntrinsic to most of the vector shift instructions and update this test accordingly. Test forked from llvm/test/CodeGen/AArch64/arm64-vshift.ll. --- .../MemorySanitizer/AArch64/arm64-vshift.ll | 9546 +++++++++++++++++ 1 file changed, 9546 insertions(+) create mode 100644 llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll new file mode 100644 index 00000000000000..a755562d683fbd --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll @@ -0,0 +1,9546 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool opt --version 4 +; +; RUN: opt < %s -passes=msan -S | FileCheck %s +; +; Forked from llvm/test/CodeGen/AArch64/arm64-vshift.ll + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android9001" + +define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @sqshl8b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp2 = load <8 x i8>, ptr %B + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @sqshl4h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp2 = load <4 x i16>, ptr %B + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @sqshl2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp2 = load <2 x i32>, ptr %B + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <1 x i64> @sqshl1d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @sqshl1d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]]) +; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp2 = load <1 x i64>, ptr %B + %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <1 x i64> @sqshl1d_constant(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @sqshl1d_constant( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @sqshl_scalar( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 [[TMP1]], i64 [[TMP2]]) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = load i64, ptr %A + %tmp2 = load i64, ptr %B + %tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 %tmp2) + ret i64 %tmp3 +} + +define i64 @sqshl_scalar_constant(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @sqshl_scalar_constant( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 [[TMP1]], i64 1) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = load i64, ptr %A + %tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 1) + ret i64 %tmp3 +} + +define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @uqshl8b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp2 = load <8 x i8>, ptr %B + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @uqshl4h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp2 = load <4 x i16>, ptr %B + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @uqshl2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp2 = load <2 x i32>, ptr %B + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @sqshl16b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp2 = load <16 x i8>, ptr %B + %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @sqshl8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp2 = load <8 x i16>, ptr %B + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @sqshl4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp2 = load <4 x i32>, ptr %B + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @sqshl2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp2 = load <2 x i64>, ptr %B + %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + +define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @uqshl16b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp2 = load <16 x i8>, ptr %B + %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @uqshl8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp2 = load <8 x i16>, ptr %B + %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @uqshl4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp2 = load <4 x i32>, ptr %B + %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @uqshl2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp2 = load <2 x i64>, ptr %B + %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + +define <1 x i64> @uqshl1d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @uqshl1d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]]) +; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp2 = load <1 x i64>, ptr %B + %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <1 x i64> @uqshl1d_constant(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @uqshl1d_constant( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @uqshl_scalar( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 [[TMP1]], i64 [[TMP2]]) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = load i64, ptr %A + %tmp2 = load i64, ptr %B + %tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 %tmp2) + ret i64 %tmp3 +} + +define i64 @uqshl_scalar_constant(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @uqshl_scalar_constant( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 [[TMP1]], i64 1) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = load i64, ptr %A + %tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 1) + ret i64 %tmp3 +} + +declare <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare i64 @llvm.aarch64.neon.sqshl.i64(i64, i64) nounwind readnone + + +declare <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare i64 @llvm.aarch64.neon.uqshl.i64(i64, i64) nounwind readnone + +declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +declare <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i8> @srshl8b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @srshl8b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp2 = load <8 x i8>, ptr %B + %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @srshl4h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @srshl4h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp2 = load <4 x i16>, ptr %B + %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @srshl2s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @srshl2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp2 = load <2 x i32>, ptr %B + %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <1 x i64> @srshl1d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @srshl1d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]]) +; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp2 = load <1 x i64>, ptr %B + %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <1 x i64> @srshl1d_constant(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @srshl1d_constant( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @srshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @srshl_scalar( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 [[TMP2]]) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = load i64, ptr %A + %tmp2 = load i64, ptr %B + %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 %tmp2) + ret i64 %tmp3 +} + +define i64 @srshl_scalar_constant(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @srshl_scalar_constant( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 1) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = load i64, ptr %A + %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 1) + ret i64 %tmp3 +} + +define <8 x i8> @urshl8b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @urshl8b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp2 = load <8 x i8>, ptr %B + %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @urshl4h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @urshl4h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp2 = load <4 x i16>, ptr %B + %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @urshl2s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @urshl2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp2 = load <2 x i32>, ptr %B + %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <1 x i64> @urshl1d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @urshl1d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]]) +; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp2 = load <1 x i64>, ptr %B + %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <1 x i64> @urshl1d_constant(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @urshl1d_constant( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @urshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @urshl_scalar( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 [[TMP2]]) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = load i64, ptr %A + %tmp2 = load i64, ptr %B + %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 %tmp2) + ret i64 %tmp3 +} + +define i64 @urshl_scalar_constant(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @urshl_scalar_constant( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 1) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = load i64, ptr %A + %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 1) + ret i64 %tmp3 +} + +define <16 x i8> @srshl16b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @srshl16b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp2 = load <16 x i8>, ptr %B + %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @srshl8h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @srshl8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp2 = load <8 x i16>, ptr %B + %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @srshl4s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @srshl4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp2 = load <4 x i32>, ptr %B + %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @srshl2d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @srshl2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp2 = load <2 x i64>, ptr %B + %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + +define <16 x i8> @urshl16b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @urshl16b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp2 = load <16 x i8>, ptr %B + %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @urshl8h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @urshl8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp2 = load <8 x i16>, ptr %B + %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @urshl4s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @urshl4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp2 = load <4 x i32>, ptr %B + %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @urshl2d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @urshl2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp2 = load <2 x i64>, ptr %B + %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + +declare <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare i64 @llvm.aarch64.neon.srshl.i64(i64, i64) nounwind readnone + +declare <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare i64 @llvm.aarch64.neon.urshl.i64(i64, i64) nounwind readnone + +declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +declare <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i8> @sqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @sqrshl8b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp2 = load <8 x i8>, ptr %B + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @sqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @sqrshl4h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp2 = load <4 x i16>, ptr %B + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @sqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @sqrshl2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp2 = load <2 x i32>, ptr %B + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <8 x i8> @uqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @uqrshl8b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp2 = load <8 x i8>, ptr %B + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @uqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @uqrshl4h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp2 = load <4 x i16>, ptr %B + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @uqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @uqrshl2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp2 = load <2 x i32>, ptr %B + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <16 x i8> @sqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @sqrshl16b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp2 = load <16 x i8>, ptr %B + %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @sqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @sqrshl8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp2 = load <8 x i16>, ptr %B + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @sqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @sqrshl4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp2 = load <4 x i32>, ptr %B + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @sqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @sqrshl2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp2 = load <2 x i64>, ptr %B + %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + +define <1 x i64> @sqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @sqrshl1d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]]) +; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp2 = load <1 x i64>, ptr %B + %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @sqrshl1d_constant( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @sqrshl_scalar( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 [[TMP1]], i64 [[TMP2]]) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = load i64, ptr %A + %tmp2 = load i64, ptr %B + %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 %tmp2) + ret i64 %tmp3 +} + +define i64 @sqrshl_scalar_constant(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @sqrshl_scalar_constant( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 [[TMP1]], i64 1) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = load i64, ptr %A + %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 1) + ret i64 %tmp3 +} + +define <16 x i8> @uqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @uqrshl16b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp2 = load <16 x i8>, ptr %B + %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @uqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @uqrshl8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp2 = load <8 x i16>, ptr %B + %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @uqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @uqrshl4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp2 = load <4 x i32>, ptr %B + %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @uqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @uqrshl2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp2 = load <2 x i64>, ptr %B + %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + +define <1 x i64> @uqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @uqrshl1d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]]) +; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp2 = load <1 x i64>, ptr %B + %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @uqrshl1d_constant( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @uqrshl_scalar( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 [[TMP1]], i64 [[TMP2]]) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = load i64, ptr %A + %tmp2 = load i64, ptr %B + %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 %tmp2) + ret i64 %tmp3 +} + +define i64 @uqrshl_scalar_constant(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @uqrshl_scalar_constant( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 [[TMP1]], i64 1) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = load i64, ptr %A + %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 1) + ret i64 %tmp3 +} + +declare <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare i64 @llvm.aarch64.neon.sqrshl.i64(i64, i64) nounwind readnone + +declare <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare i64 @llvm.aarch64.neon.uqrshl.i64(i64, i64) nounwind readnone + +declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +declare <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i8> @urshr8b(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @urshr8b( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @urshr4h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @urshr4h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @urshr2s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @urshr2s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) + ret <2 x i32> %tmp3 +} + +define <16 x i8> @urshr16b(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @urshr16b( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @urshr8h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @urshr8h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @urshr4s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @urshr4s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @urshr2d(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @urshr2d( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) + ret <2 x i64> %tmp3 +} + +define <1 x i64> @urshr1d(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @urshr1d( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @urshr_scalar(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @urshr_scalar( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 -1) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = load i64, ptr %A + %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1) + ret i64 %tmp3 +} + +define <8 x i8> @srshr8b(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @srshr8b( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @srshr4h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @srshr4h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @srshr2s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @srshr2s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) + ret <2 x i32> %tmp3 +} + +define <16 x i8> @srshr16b(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @srshr16b( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @srshr8h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @srshr8h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @srshr4s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @srshr4s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @srshr2d(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @srshr2d( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) + ret <2 x i64> %tmp3 +} + +define <1 x i64> @srshr1d(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @srshr1d( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @srshr_scalar(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @srshr_scalar( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 -1) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = load i64, ptr %A + %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1) + ret i64 %tmp3 +} + +define <8 x i8> @sqshlu8b(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @sqshlu8b( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> ) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @sqshlu4h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @sqshlu4h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> ) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @sqshlu2s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @sqshlu2s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> ) + ret <2 x i32> %tmp3 +} + +define <16 x i8> @sqshlu16b(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @sqshlu16b( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> ) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @sqshlu8h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @sqshlu8h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> ) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @sqshlu4s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @sqshlu4s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> ) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @sqshlu2d(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @sqshlu2d( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> ) + ret <2 x i64> %tmp3 +} + +define <1 x i64> @sqshlu1d_constant(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @sqshlu1d_constant( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> %tmp1, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @sqshlu_i64_constant(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @sqshlu_i64_constant( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 [[TMP1]], i64 1) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = load i64, ptr %A + %tmp3 = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %tmp1, i64 1) + ret i64 %tmp3 +} + +define i32 @sqshlu_i32_constant(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define i32 @sqshlu_i32_constant( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[_MSLD]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 [[TMP1]], i32 1) +; CHECK-NEXT: store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %tmp1 = load i32, ptr %A + %tmp3 = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %tmp1, i32 1) + ret i32 %tmp3 +} + +declare <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare i64 @llvm.aarch64.neon.sqshlu.i64(i64, i64) nounwind readnone +declare i32 @llvm.aarch64.neon.sqshlu.i32(i32, i32) nounwind readnone + +declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i8> @rshrn8b(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @rshrn8b( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[TMP1]], i32 1) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @rshrn4h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @rshrn4h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[TMP1]], i32 1) +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @rshrn2s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @rshrn2s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[TMP1]], i32 1) +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1) + ret <2 x i32> %tmp3 +} + +define <16 x i8> @rshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @rshrn16b( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> zeroinitializer, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP4]] +; + %out = load <8 x i8>, ptr %ret + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1) + %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> + ret <16 x i8> %tmp4 +} + +define <8 x i16> @rshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @rshrn8h( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> zeroinitializer, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP4]] +; + %out = load <4 x i16>, ptr %ret + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1) + %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> + ret <8 x i16> %tmp4 +} + +define <4 x i32> @rshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @rshrn4s( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP4]] +; + %out = load <2 x i32>, ptr %ret + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1) + %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> + ret <4 x i32> %tmp4 +} + +declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone + +define <8 x i8> @shrn8b(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @shrn8b( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = lshr <8 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i16> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> +; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp2 = lshr <8 x i16> %tmp1, + %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8> + ret <8 x i8> %tmp3 +} + +define <4 x i16> @shrn4h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @shrn4h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <4 x i32> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp2 = lshr <4 x i32> %tmp1, + %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16> + ret <4 x i16> %tmp3 +} + +define <2 x i32> @shrn2s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @shrn2s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[_MSLD]], +; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <2 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp2 = lshr <2 x i64> %tmp1, + %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32> + ret <2 x i32> %tmp3 +} + +define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @shrn16b( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD1]], +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i16> [[TMP14]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSPROP]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> +; CHECK-NEXT: store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP4]] +; + %out = load <8 x i8>, ptr %ret + %tmp1 = load <8 x i16>, ptr %A + %tmp2 = lshr <8 x i16> %tmp1, + %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8> + %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> + ret <16 x i8> %tmp4 +} + +define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @shrn8h( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD1]], +; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <4 x i32> [[TMP14]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSPROP]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> +; CHECK-NEXT: store <8 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP4]] +; + %out = load <4 x i16>, ptr %ret + %tmp1 = load <4 x i32>, ptr %A + %tmp2 = lshr <4 x i32> %tmp1, + %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16> + %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> + ret <8 x i16> %tmp4 +} + +define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @shrn4s( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD1]], +; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <2 x i64> [[TMP14]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSPROP]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> +; CHECK-NEXT: store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP4]] +; + %out = load <2 x i32>, ptr %ret + %tmp1 = load <2 x i64>, ptr %A + %tmp2 = lshr <2 x i64> %tmp1, + %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32> + %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> + ret <4 x i32> %tmp4 +} + +declare <8 x i8> @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone + +define i32 @sqshrn1s(i64 %A) nounwind sanitize_memory { +; CHECK-LABEL: define i32 @sqshrn1s( +; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 [[A]], i32 1) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP]] +; + %tmp = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %A, i32 1) + ret i32 %tmp +} + +define <8 x i8> @sqshrn8b(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @sqshrn8b( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP1]], i32 1) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @sqshrn4h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @sqshrn4h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP1]], i32 1) +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @sqshrn2s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @sqshrn2s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[TMP1]], i32 1) +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1) + ret <2 x i32> %tmp3 +} + + +define <16 x i8> @sqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @sqshrn16b( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> zeroinitializer, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP4]] +; + %out = load <8 x i8>, ptr %ret + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1) + %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> + ret <16 x i8> %tmp4 +} + +define <8 x i16> @sqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @sqshrn8h( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> zeroinitializer, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP4]] +; + %out = load <4 x i16>, ptr %ret + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1) + %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> + ret <8 x i16> %tmp4 +} + +define <4 x i32> @sqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @sqshrn4s( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP4]] +; + %out = load <2 x i32>, ptr %ret + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1) + %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> + ret <4 x i32> %tmp4 +} + +declare i32 @llvm.aarch64.neon.sqshrn.i32(i64, i32) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone + +define i32 @sqshrun1s(i64 %A) nounwind sanitize_memory { +; CHECK-LABEL: define i32 @sqshrun1s( +; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 [[A]], i32 1) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP]] +; + %tmp = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %A, i32 1) + ret i32 %tmp +} + +define <8 x i8> @sqshrun8b(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @sqshrun8b( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP1]], i32 1) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @sqshrun4h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @sqshrun4h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP1]], i32 1) +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @sqshrun2s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @sqshrun2s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[TMP1]], i32 1) +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1) + ret <2 x i32> %tmp3 +} + +define <16 x i8> @sqshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @sqshrun16b( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> zeroinitializer, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP4]] +; + %out = load <8 x i8>, ptr %ret + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1) + %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> + ret <16 x i8> %tmp4 +} + +define <8 x i16> @sqshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @sqshrun8h( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> zeroinitializer, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP4]] +; + %out = load <4 x i16>, ptr %ret + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1) + %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> + ret <8 x i16> %tmp4 +} + +define <4 x i32> @sqshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @sqshrun4s( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP4]] +; + %out = load <2 x i32>, ptr %ret + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1) + %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> + ret <4 x i32> %tmp4 +} + +declare i32 @llvm.aarch64.neon.sqshrun.i32(i64, i32) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone + +define i32 @sqrshrn1s(i64 %A) nounwind sanitize_memory { +; CHECK-LABEL: define i32 @sqrshrn1s( +; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 [[A]], i32 1) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP]] +; + %tmp = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %A, i32 1) + ret i32 %tmp +} + +define <8 x i8> @sqrshrn8b(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @sqrshrn8b( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP1]], i32 1) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @sqrshrn4h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @sqrshrn4h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP1]], i32 1) +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @sqrshrn2s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @sqrshrn2s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[TMP1]], i32 1) +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1) + ret <2 x i32> %tmp3 +} + +define <16 x i8> @sqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @sqrshrn16b( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> zeroinitializer, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP4]] +; + %out = load <8 x i8>, ptr %ret + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1) + %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> + ret <16 x i8> %tmp4 +} + +define <8 x i16> @sqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @sqrshrn8h( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> zeroinitializer, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP4]] +; + %out = load <4 x i16>, ptr %ret + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1) + %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> + ret <8 x i16> %tmp4 +} + +define <4 x i32> @sqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @sqrshrn4s( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP4]] +; + %out = load <2 x i32>, ptr %ret + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1) + %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> + ret <4 x i32> %tmp4 +} + +declare i32 @llvm.aarch64.neon.sqrshrn.i32(i64, i32) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone + +define i32 @sqrshrun1s(i64 %A) nounwind sanitize_memory { +; CHECK-LABEL: define i32 @sqrshrun1s( +; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 [[A]], i32 1) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP]] +; + %tmp = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %A, i32 1) + ret i32 %tmp +} + +define <8 x i8> @sqrshrun8b(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @sqrshrun8b( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP1]], i32 1) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @sqrshrun4h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @sqrshrun4h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP1]], i32 1) +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @sqrshrun2s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @sqrshrun2s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[TMP1]], i32 1) +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1) + ret <2 x i32> %tmp3 +} + +define <16 x i8> @sqrshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @sqrshrun16b( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> zeroinitializer, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP4]] +; + %out = load <8 x i8>, ptr %ret + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1) + %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> + ret <16 x i8> %tmp4 +} + +define <8 x i16> @sqrshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @sqrshrun8h( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> zeroinitializer, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP4]] +; + %out = load <4 x i16>, ptr %ret + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1) + %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> + ret <8 x i16> %tmp4 +} + +define <4 x i32> @sqrshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @sqrshrun4s( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP4]] +; + %out = load <2 x i32>, ptr %ret + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1) + %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> + ret <4 x i32> %tmp4 +} + +declare i32 @llvm.aarch64.neon.sqrshrun.i32(i64, i32) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone + +define i32 @uqrshrn1s(i64 %A) nounwind sanitize_memory { +; CHECK-LABEL: define i32 @uqrshrn1s( +; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 [[A]], i32 1) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP]] +; + %tmp = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %A, i32 1) + ret i32 %tmp +} + +define <8 x i8> @uqrshrn8b(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @uqrshrn8b( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP1]], i32 1) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @uqrshrn4h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @uqrshrn4h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP1]], i32 1) +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @uqrshrn2s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @uqrshrn2s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[TMP1]], i32 1) +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1) + ret <2 x i32> %tmp3 +} + +define <16 x i8> @uqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @uqrshrn16b( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> zeroinitializer, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP4]] +; + %out = load <8 x i8>, ptr %ret + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1) + %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> + ret <16 x i8> %tmp4 +} + +define <8 x i16> @uqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @uqrshrn8h( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> zeroinitializer, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP4]] +; + %out = load <4 x i16>, ptr %ret + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1) + %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> + ret <8 x i16> %tmp4 +} + +define <4 x i32> @uqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @uqrshrn4s( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP4]] +; + %out = load <2 x i32>, ptr %ret + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1) + %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> + ret <4 x i32> %tmp4 +} + +declare i32 @llvm.aarch64.neon.uqrshrn.i32(i64, i32) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone + +define i32 @uqshrn1s(i64 %A) nounwind sanitize_memory { +; CHECK-LABEL: define i32 @uqshrn1s( +; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 [[A]], i32 1) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP]] +; + %tmp = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %A, i32 1) + ret i32 %tmp +} + +define <8 x i8> @uqshrn8b(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @uqshrn8b( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP1]], i32 1) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @uqshrn4h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @uqshrn4h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP1]], i32 1) +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @uqshrn2s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @uqshrn2s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[TMP1]], i32 1) +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1) + ret <2 x i32> %tmp3 +} + +define <16 x i8> @uqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @uqshrn16b( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> zeroinitializer, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP4]] +; + %out = load <8 x i8>, ptr %ret + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1) + %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> + ret <16 x i8> %tmp4 +} + +define <8 x i16> @uqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @uqshrn8h( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> zeroinitializer, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP4]] +; + %out = load <4 x i16>, ptr %ret + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1) + %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> + ret <8 x i16> %tmp4 +} + +define <4 x i32> @uqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @uqshrn4s( +; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[TMP1]], i32 1) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP4]] +; + %out = load <2 x i32>, ptr %ret + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1) + %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> + ret <4 x i32> %tmp4 +} + +declare i32 @llvm.aarch64.neon.uqshrn.i32(i64, i32) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone + +define <8 x i16> @ushll8h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @ushll8h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], +; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> + %tmp3 = shl <8 x i16> %tmp2, + ret <8 x i16> %tmp3 +} + +define <4 x i32> @ushll4s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @ushll4s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP]], +; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], +; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> + %tmp3 = shl <4 x i32> %tmp2, + ret <4 x i32> %tmp3 +} + +define <2 x i64> @ushll2d(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @ushll2d( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext <2 x i32> [[_MSLD]] to <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], +; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], +; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> + %tmp3 = shl <2 x i64> %tmp2, + ret <2 x i64> %tmp3 +} + +define <8 x i16> @ushll2_8h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @ushll2_8h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[LOAD1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> , <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[LOAD1]], <16 x i8> undef, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <8 x i8> [[_MSPROP]] to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], +; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %load1 = load <16 x i8>, ptr %A + %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> + %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> + %tmp3 = shl <8 x i16> %tmp2, + ret <8 x i16> %tmp3 +} + +define <4 x i32> @ushll2_4s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @ushll2_4s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[LOAD1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> , <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[LOAD1]], <8 x i16> undef, <4 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <4 x i16> [[_MSPROP]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], +; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], +; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %load1 = load <8 x i16>, ptr %A + %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> + %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> + %tmp3 = shl <4 x i32> %tmp2, + ret <4 x i32> %tmp3 +} + +define <2 x i64> @ushll2_2d(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @ushll2_2d( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[LOAD1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD1]], <4 x i32> undef, <2 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <2 x i32> [[_MSPROP]] to <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], +; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], +; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %load1 = load <4 x i32>, ptr %A + %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> + %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> + %tmp3 = shl <2 x i64> %tmp2, + ret <2 x i64> %tmp3 +} + +declare <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64>, <2 x i64>) +declare <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64>, <1 x i64>) +declare i64 @llvm.aarch64.neon.ushl.i64(i64, i64) + +define <8 x i16> @neon.ushll8h_constant_shift(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @neon.ushll8h_constant_shift( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP2]], <8 x i16> ) +; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> + %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> ) + ret <8 x i16> %tmp3 +} + +define <8 x i16> @neon.ushl8h_no_constant_shift(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @neon.ushl8h_no_constant_shift( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[_MSPROP]], [[_MSPROP]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP2]], <8 x i16> [[TMP2]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> + %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[A]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext <4 x i8> [[_MSLD]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> ) +; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i8>, ptr %A + %tmp2 = zext <4 x i8> %tmp1 to <4 x i32> + %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> ) + ret <4 x i32> %tmp3 +} + +define <8 x i16> @neon.ushl8_noext_constant_shift(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @neon.ushl8_noext_constant_shift( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp1, <8 x i16> ) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @neon.ushll4s_constant_shift(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @neon.ushll4s_constant_shift( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> ) +; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> + %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> ) + ret <4 x i32> %tmp3 +} + +; FIXME: unnecessary ushll.4s v0, v0, #0? +define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @neon.ushll4s_neg_constant_shift( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> ) +; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> + %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> ) + ret <4 x i32> %tmp3 +} + +; FIXME: should be constant folded. +define <4 x i32> @neon.ushll4s_constant_fold() nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @neon.ushll4s_constant_fold( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> , <4 x i32> ) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @neon.ushll2d_constant_shift(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @neon.ushll2d_constant_shift( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext <2 x i32> [[_MSLD]] to <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[TMP2]], <2 x i64> ) +; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> + %tmp3 = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %tmp2, <2 x i64> ) + ret <2 x i64> %tmp3 +} + +define <1 x i64> @neon.ushl_vscalar_constant_shift(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @neon.ushl_vscalar_constant_shift( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[A]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext <1 x i32> [[_MSLD]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = zext <1 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <1 x i64> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[TMP2]], <1 x i64> ) +; CHECK-NEXT: store <1 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmp1 = load <1 x i32>, ptr %A + %tmp2 = zext <1 x i32> %tmp1 to <1 x i64> + %tmp3 = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %tmp2, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @neon.ushl_scalar_constant_shift(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @neon.ushl_scalar_constant_shift( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext i32 [[_MSLD]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 [[TMP2]], i64 1) +; CHECK-NEXT: store i64 [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = load i32, ptr %A + %tmp2 = zext i32 %tmp1 to i64 + %tmp3 = call i64 @llvm.aarch64.neon.ushl.i64(i64 %tmp2, i64 1) + ret i64 %tmp3 +} + +define <8 x i16> @sshll8h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @sshll8h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = sext <8 x i8> [[_MSLD]] to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], +; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> + %tmp3 = shl <8 x i16> %tmp2, + ret <8 x i16> %tmp3 +} + +define <2 x i64> @sshll2d(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @sshll2d( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = sext <2 x i32> [[_MSLD]] to <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], +; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], +; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> + %tmp3 = shl <2 x i64> %tmp2, + ret <2 x i64> %tmp3 +} + +declare <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64>, <2 x i64>) +declare <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64>, <1 x i64>) +declare i64 @llvm.aarch64.neon.sshl.i64(i64, i64) + +define <16 x i8> @neon.sshl16b_constant_shift(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @neon.sshl16b_constant_shift( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP2]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) + ret <16 x i8> %tmp2 +} + +define <16 x i8> @neon.sshl16b_non_splat_constant_shift(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @neon.sshl16b_non_splat_constant_shift( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP2]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) + ret <16 x i8> %tmp2 +} + +define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @neon.sshl16b_neg_constant_shift( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP2]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) + ret <16 x i8> %tmp2 +} + +define <8 x i16> @neon.sshll8h_constant_shift(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @neon.sshll8h_constant_shift( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = sext <8 x i8> [[_MSLD]] to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[TMP2]], <8 x i16> ) +; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %tmp2, <8 x i16> ) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[A]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = sext <4 x i8> [[_MSLD]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i8> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> ) +; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i8>, ptr %A + %tmp2 = sext <4 x i8> %tmp1 to <4 x i32> + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> ) + ret <4 x i32> %tmp3 +} + +define <4 x i32> @neon.sshll4s_constant_shift(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @neon.sshll4s_constant_shift( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = sext <4 x i16> [[_MSLD]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> ) +; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> ) + ret <4 x i32> %tmp3 +} + +define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @neon.sshll4s_neg_constant_shift( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = sext <4 x i16> [[_MSLD]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> ) +; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> ) + ret <4 x i32> %tmp3 +} + +; FIXME: should be constant folded. +define <4 x i32> @neon.sshl4s_constant_fold() nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @neon.sshl4s_constant_fold( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> , <4 x i32> ) + ret <4 x i32> %tmp3 +} + +define <4 x i32> @neon.sshl4s_no_fold(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @neon.sshl4s_no_fold( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @neon.sshll2d_constant_shift(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @neon.sshll2d_constant_shift( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = sext <2 x i32> [[_MSLD]] to <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> ) +; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> + %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> ) + ret <2 x i64> %tmp3 +} + +define <1 x i64> @neon.sshll_vscalar_constant_shift(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @neon.sshll_vscalar_constant_shift( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[A]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext <1 x i32> [[_MSLD]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = zext <1 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <1 x i64> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[TMP2]], <1 x i64> ) +; CHECK-NEXT: store <1 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmp1 = load <1 x i32>, ptr %A + %tmp2 = zext <1 x i32> %tmp1 to <1 x i64> + %tmp3 = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %tmp2, <1 x i64> ) + ret <1 x i64> %tmp3 +} + +define i64 @neon.sshll_scalar_constant_shift(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @neon.sshll_scalar_constant_shift( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext i32 [[_MSLD]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 [[TMP2]], i64 1) +; CHECK-NEXT: store i64 [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = load i32, ptr %A + %tmp2 = zext i32 %tmp1 to i64 + %tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 1) + ret i64 %tmp3 +} + +define i64 @neon.sshll_scalar_constant_shift_m1(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @neon.sshll_scalar_constant_shift_m1( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext i32 [[_MSLD]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[_MSPROP]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 [[TMP2]], i64 -1) +; CHECK-NEXT: store i64 [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; + %tmp1 = load i32, ptr %A + %tmp2 = zext i32 %tmp1 to i64 + %tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 -1) + ret i64 %tmp3 +} + +; FIXME: should be constant folded. +define <2 x i64> @neon.sshl2d_constant_fold() nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @neon.sshl2d_constant_fold( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> , <2 x i64> ) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> , <2 x i64> ) + ret <2 x i64> %tmp3 +} + +define <2 x i64> @neon.sshl2d_no_fold(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @neon.sshl2d_no_fold( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> ) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmp2 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> ) + ret <2 x i64> %tmp3 +} + +define <8 x i16> @sshll2_8h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @sshll2_8h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[LOAD1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> , <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[LOAD1]], <16 x i8> undef, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <8 x i8> [[_MSPROP]] to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], +; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %load1 = load <16 x i8>, ptr %A + %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> + %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> + %tmp3 = shl <8 x i16> %tmp2, + ret <8 x i16> %tmp3 +} + +define <4 x i32> @sshll2_4s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @sshll2_4s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[LOAD1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> , <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[LOAD1]], <8 x i16> undef, <4 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <4 x i16> [[_MSPROP]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], +; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], +; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %load1 = load <8 x i16>, ptr %A + %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> + %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> + %tmp3 = shl <4 x i32> %tmp2, + ret <4 x i32> %tmp3 +} + +define <2 x i64> @sshll2_2d(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @sshll2_2d( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[LOAD1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD1]], <4 x i32> undef, <2 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <2 x i32> [[_MSPROP]] to <2 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], +; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], +; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %load1 = load <4 x i32>, ptr %A + %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> + %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> + %tmp3 = shl <2 x i64> %tmp2, + ret <2 x i64> %tmp3 +} + +define <8 x i8> @sqshli8b(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @sqshli8b( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @sqshli4h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @sqshli4h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @sqshli2s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @sqshli2s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) + ret <2 x i32> %tmp3 +} + +define <16 x i8> @sqshli16b(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @sqshli16b( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @sqshli8h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @sqshli8h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @sqshli4s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @sqshli4s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @sqshli2d(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @sqshli2d( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) + ret <2 x i64> %tmp3 +} + +define <8 x i8> @uqshli8b(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @uqshli8b( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) + ret <8 x i8> %tmp3 +} + +define <8 x i8> @uqshli8b_1(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @uqshli8b_1( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @uqshli4h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @uqshli4h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @uqshli2s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @uqshli2s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) + ret <2 x i32> %tmp3 +} + +define <16 x i8> @uqshli16b(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @uqshli16b( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @uqshli8h(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @uqshli8h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @uqshli4s(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @uqshli4s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @uqshli2d(ptr %A) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @uqshli2d( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) + ret <2 x i64> %tmp3 +} + +define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @ursra8b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i8> [[_MSPROP]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i8> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <8 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP5]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) + %tmp4 = load <8 x i8>, ptr %B + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @ursra4h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i16> [[_MSPROP]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <4 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP5]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) + %tmp4 = load <4 x i16>, ptr %B + %tmp5 = add <4 x i16> %tmp3, %tmp4 + ret <4 x i16> %tmp5 +} + +define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @ursra2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <2 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP5]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) + %tmp4 = load <2 x i32>, ptr %B + %tmp5 = add <2 x i32> %tmp3, %tmp4 + ret <2 x i32> %tmp5 +} + +define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @ursra16b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP5]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) + %tmp4 = load <16 x i8>, ptr %B + %tmp5 = add <16 x i8> %tmp3, %tmp4 + ret <16 x i8> %tmp5 +} + +define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @ursra8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i16> [[_MSPROP]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP5]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) + %tmp4 = load <8 x i16>, ptr %B + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @ursra4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i32> [[_MSPROP]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP5]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) + %tmp4 = load <4 x i32>, ptr %B + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 +} + +define <2 x i64> @ursra2d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @ursra2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i64> [[_MSPROP]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP5]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) + %tmp4 = load <2 x i64>, ptr %B + %tmp5 = add <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 +} + +define <1 x i64> @ursra1d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @ursra1d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <1 x i64> [[_MSPROP]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <1 x i64> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <1 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP5]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + %tmp4 = load <1 x i64>, ptr %B + %tmp5 = add <1 x i64> %tmp3, %tmp4 + ret <1 x i64> %tmp5 +} + +define i64 @ursra_scalar(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @ursra_scalar( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 -1) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or i64 [[_MSPROP]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], [[TMP4]] +; CHECK-NEXT: store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP5]] +; + %tmp1 = load i64, ptr %A + %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1) + %tmp4 = load i64, ptr %B + %tmp5 = add i64 %tmp3, %tmp4 + ret i64 %tmp5 +} + +define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @srsra8b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> ) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i8> [[_MSPROP]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i8> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <8 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP5]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> ) + %tmp4 = load <8 x i8>, ptr %B + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @srsra4h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> ) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i16> [[_MSPROP]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <4 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP5]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> ) + %tmp4 = load <4 x i16>, ptr %B + %tmp5 = add <4 x i16> %tmp3, %tmp4 + ret <4 x i16> %tmp5 +} + +define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @srsra2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> ) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <2 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP5]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> ) + %tmp4 = load <2 x i32>, ptr %B + %tmp5 = add <2 x i32> %tmp3, %tmp4 + ret <2 x i32> %tmp5 +} + +define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @srsra16b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> ) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP5]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> ) + %tmp4 = load <16 x i8>, ptr %B + %tmp5 = add <16 x i8> %tmp3, %tmp4 + ret <16 x i8> %tmp5 +} + +define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @srsra8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> ) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i16> [[_MSPROP]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP5]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> ) + %tmp4 = load <8 x i16>, ptr %B + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @srsra4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> ) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i32> [[_MSPROP]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP5]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> ) + %tmp4 = load <4 x i32>, ptr %B + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 +} + +define <2 x i64> @srsra2d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @srsra2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> ) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i64> [[_MSPROP]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP5]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> ) + %tmp4 = load <2 x i64>, ptr %B + %tmp5 = add <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 +} + +define <1 x i64> @srsra1d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @srsra1d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> ) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <1 x i64> [[_MSPROP]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <1 x i64> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <1 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP5]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> ) + %tmp4 = load <1 x i64>, ptr %B + %tmp5 = add <1 x i64> %tmp3, %tmp4 + ret <1 x i64> %tmp5 +} + +define i64 @srsra_scalar(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define i64 @srsra_scalar( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 -1) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP2:%.*]] = or i64 [[_MSPROP]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], [[TMP4]] +; CHECK-NEXT: store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP5]] +; + %tmp1 = load i64, ptr %A + %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1) + %tmp4 = load i64, ptr %B + %tmp5 = add i64 %tmp3, %tmp4 + ret i64 %tmp5 +} + +define <8 x i8> @usra8b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @usra8b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = lshr <8 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP14]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP9]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i8> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP5]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp3 = lshr <8 x i8> %tmp1, + %tmp4 = load <8 x i8>, ptr %B + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <4 x i16> @usra4h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @usra4h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP14]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP9]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP5]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp3 = lshr <4 x i16> %tmp1, + %tmp4 = load <4 x i16>, ptr %B + %tmp5 = add <4 x i16> %tmp3, %tmp4 + ret <4 x i16> %tmp5 +} + +define <2 x i32> @usra2s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @usra2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = lshr <2 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP14]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP9]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP5]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp3 = lshr <2 x i32> %tmp1, + %tmp4 = load <2 x i32>, ptr %B + %tmp5 = add <2 x i32> %tmp3, %tmp4 + ret <2 x i32> %tmp5 +} + +define <16 x i8> @usra16b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @usra16b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = lshr <16 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP14]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP9]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP5]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp3 = lshr <16 x i8> %tmp1, + %tmp4 = load <16 x i8>, ptr %B + %tmp5 = add <16 x i8> %tmp3, %tmp4 + ret <16 x i8> %tmp5 +} + +define <8 x i16> @usra8h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @usra8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = lshr <8 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP14]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP9]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP5]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = lshr <8 x i16> %tmp1, + %tmp4 = load <8 x i16>, ptr %B + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +define <4 x i32> @usra4s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @usra4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP14]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP9]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP5]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = lshr <4 x i32> %tmp1, + %tmp4 = load <4 x i32>, ptr %B + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 +} + +define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @usra2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = lshr <2 x i64> [[_MSLD]], +; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP14]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP9]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP5]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = lshr <2 x i64> %tmp1, + %tmp4 = load <2 x i64>, ptr %B + %tmp5 = add <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 +} + +define <1 x i64> @usra1d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @usra1d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = lshr <1 x i64> [[_MSLD]], +; CHECK-NEXT: [[TMP9:%.*]] = or <1 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = lshr <1 x i64> [[TMP1]], +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP14]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP9]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <1 x i64> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP5]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp3 = lshr <1 x i64> %tmp1, + %tmp4 = load <1 x i64>, ptr %B + %tmp5 = add <1 x i64> %tmp3, %tmp4 + ret <1 x i64> %tmp5 +} + +define <8 x i8> @ssra8b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @ssra8b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = ashr <8 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i8> [[TMP1]], +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP14]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP9]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i8> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP5]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp3 = ashr <8 x i8> %tmp1, + %tmp4 = load <8 x i8>, ptr %B + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <4 x i16> @ssra4h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @ssra4h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = ashr <4 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i16> [[TMP1]], +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP14]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP9]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP5]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp3 = ashr <4 x i16> %tmp1, + %tmp4 = load <4 x i16>, ptr %B + %tmp5 = add <4 x i16> %tmp3, %tmp4 + ret <4 x i16> %tmp5 +} + +define <2 x i32> @ssra2s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @ssra2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = ashr <2 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = ashr <2 x i32> [[TMP1]], +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP14]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP9]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP5]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp3 = ashr <2 x i32> %tmp1, + %tmp4 = load <2 x i32>, ptr %B + %tmp5 = add <2 x i32> %tmp3, %tmp4 + ret <2 x i32> %tmp5 +} + +define <16 x i8> @ssra16b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @ssra16b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = ashr <16 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = ashr <16 x i8> [[TMP1]], +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP14]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP9]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP5]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp3 = ashr <16 x i8> %tmp1, + %tmp4 = load <16 x i8>, ptr %B + %tmp5 = add <16 x i8> %tmp3, %tmp4 + ret <16 x i8> %tmp5 +} + +define <8 x i16> @ssra8h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @ssra8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = ashr <8 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[TMP1]], +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP14]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP9]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP5]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp3 = ashr <8 x i16> %tmp1, + %tmp4 = load <8 x i16>, ptr %B + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +define <4 x i32> @ssra4s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @ssra4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = ashr <4 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP14]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP9]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP5]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp3 = ashr <4 x i32> %tmp1, + %tmp4 = load <4 x i32>, ptr %B + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 +} + +define <2 x i64> @ssra2d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @ssra2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = ashr <2 x i64> [[_MSLD]], +; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = ashr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP14]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP9]], [[_MSLD1]] +; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP5]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp3 = ashr <2 x i64> %tmp1, + %tmp4 = load <2 x i64>, ptr %B + %tmp5 = add <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 +} + +define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @shr_orr8b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], +; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], +; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i8> [[TMP14]], [[_MSLD1]] +; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i8> [[TMP15]], [[_MSLD1]] +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i8> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = or <8 x i8> [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i8> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i8> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <8 x i8> [[TMP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP5]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp4 = load <8 x i8>, ptr %B + %tmp3 = lshr <8 x i8> %tmp1, + %tmp5 = or <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @shr_orr4h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], +; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i16> [[TMP14]], [[_MSLD1]] +; CHECK-NEXT: [[TMP18:%.*]] = and <4 x i16> [[TMP15]], [[_MSLD1]] +; CHECK-NEXT: [[TMP19:%.*]] = and <4 x i16> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i16> [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or <4 x i16> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <4 x i16> [[TMP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP5]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp4 = load <4 x i16>, ptr %B + %tmp3 = lshr <4 x i16> %tmp1, + %tmp5 = or <4 x i16> %tmp3, %tmp4 + ret <4 x i16> %tmp5 +} + +define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @shr_orr2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i32> [[TMP14]], [[_MSLD1]] +; CHECK-NEXT: [[TMP18:%.*]] = and <2 x i32> [[TMP15]], [[_MSLD1]] +; CHECK-NEXT: [[TMP19:%.*]] = and <2 x i32> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = or <2 x i32> [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or <2 x i32> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <2 x i32> [[TMP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP5]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp4 = load <2 x i32>, ptr %B + %tmp3 = lshr <2 x i32> %tmp1, + %tmp5 = or <2 x i32> %tmp3, %tmp4 + ret <2 x i32> %tmp5 +} + +define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @shr_orr16b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = lshr <16 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], +; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], +; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP14]], [[_MSLD1]] +; CHECK-NEXT: [[TMP18:%.*]] = and <16 x i8> [[TMP15]], [[_MSLD1]] +; CHECK-NEXT: [[TMP19:%.*]] = and <16 x i8> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = or <16 x i8> [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i8> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i8> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <16 x i8> [[TMP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP5]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp4 = load <16 x i8>, ptr %B + %tmp3 = lshr <16 x i8> %tmp1, + %tmp5 = or <16 x i8> %tmp3, %tmp4 + ret <16 x i8> %tmp5 +} + +define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @shr_orr8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], +; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i16> [[TMP14]], [[_MSLD1]] +; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i16> [[TMP15]], [[_MSLD1]] +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i16> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = or <8 x i16> [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i16> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <8 x i16> [[TMP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP5]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp4 = load <8 x i16>, ptr %B + %tmp3 = lshr <8 x i16> %tmp1, + %tmp5 = or <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @shr_orr4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i32> [[TMP14]], [[_MSLD1]] +; CHECK-NEXT: [[TMP18:%.*]] = and <4 x i32> [[TMP15]], [[_MSLD1]] +; CHECK-NEXT: [[TMP19:%.*]] = and <4 x i32> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i32> [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or <4 x i32> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <4 x i32> [[TMP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP5]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp4 = load <4 x i32>, ptr %B + %tmp3 = lshr <4 x i32> %tmp1, + %tmp5 = or <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 +} + +define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @shr_orr2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD]], +; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i64> [[TMP14]], [[_MSLD1]] +; CHECK-NEXT: [[TMP18:%.*]] = and <2 x i64> [[TMP15]], [[_MSLD1]] +; CHECK-NEXT: [[TMP19:%.*]] = and <2 x i64> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = or <2 x i64> [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or <2 x i64> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i64> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <2 x i64> [[TMP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP5]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp4 = load <2 x i64>, ptr %B + %tmp3 = lshr <2 x i64> %tmp1, + %tmp5 = or <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 +} + +define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @shl_orr8b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], +; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], +; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i8> [[TMP14]], [[_MSLD1]] +; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i8> [[TMP15]], [[_MSLD1]] +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i8> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = or <8 x i8> [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i8> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i8> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <8 x i8> [[TMP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP5]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp4 = load <8 x i8>, ptr %B + %tmp3 = shl <8 x i8> %tmp1, + %tmp5 = or <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @shl_orr4h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], +; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i16> [[TMP14]], [[_MSLD1]] +; CHECK-NEXT: [[TMP18:%.*]] = and <4 x i16> [[TMP15]], [[_MSLD1]] +; CHECK-NEXT: [[TMP19:%.*]] = and <4 x i16> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i16> [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or <4 x i16> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <4 x i16> [[TMP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP5]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp4 = load <4 x i16>, ptr %B + %tmp3 = shl <4 x i16> %tmp1, + %tmp5 = or <4 x i16> %tmp3, %tmp4 + ret <4 x i16> %tmp5 +} + +define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @shl_orr2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = shl <2 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i32> [[TMP14]], [[_MSLD1]] +; CHECK-NEXT: [[TMP18:%.*]] = and <2 x i32> [[TMP15]], [[_MSLD1]] +; CHECK-NEXT: [[TMP19:%.*]] = and <2 x i32> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = or <2 x i32> [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or <2 x i32> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <2 x i32> [[TMP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP5]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp4 = load <2 x i32>, ptr %B + %tmp3 = shl <2 x i32> %tmp1, + %tmp5 = or <2 x i32> %tmp3, %tmp4 + ret <2 x i32> %tmp5 +} + +define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @shl_orr16b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = shl <16 x i8> [[_MSLD]], +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], +; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], +; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP14]], [[_MSLD1]] +; CHECK-NEXT: [[TMP18:%.*]] = and <16 x i8> [[TMP15]], [[_MSLD1]] +; CHECK-NEXT: [[TMP19:%.*]] = and <16 x i8> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = or <16 x i8> [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i8> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i8> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <16 x i8> [[TMP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP5]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp4 = load <16 x i8>, ptr %B + %tmp3 = shl <16 x i8> %tmp1, + %tmp5 = or <16 x i8> %tmp3, %tmp4 + ret <16 x i8> %tmp5 +} + +define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @shl_orr8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i16> [[_MSLD]], +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], +; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i16> [[TMP14]], [[_MSLD1]] +; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i16> [[TMP15]], [[_MSLD1]] +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i16> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = or <8 x i16> [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i16> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i16> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <8 x i16> [[TMP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP5]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp4 = load <8 x i16>, ptr %B + %tmp3 = shl <8 x i16> %tmp1, + %tmp5 = or <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @shl_orr4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i32> [[_MSLD]], +; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i32> [[TMP14]], [[_MSLD1]] +; CHECK-NEXT: [[TMP18:%.*]] = and <4 x i32> [[TMP15]], [[_MSLD1]] +; CHECK-NEXT: [[TMP19:%.*]] = and <4 x i32> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i32> [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or <4 x i32> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <4 x i32> [[TMP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP5]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp4 = load <4 x i32>, ptr %B + %tmp3 = shl <4 x i32> %tmp1, + %tmp5 = or <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 +} + +define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @shl_orr2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = shl <2 x i64> [[_MSLD]], +; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i64> [[TMP14]], [[_MSLD1]] +; CHECK-NEXT: [[TMP18:%.*]] = and <2 x i64> [[TMP15]], [[_MSLD1]] +; CHECK-NEXT: [[TMP19:%.*]] = and <2 x i64> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = or <2 x i64> [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or <2 x i64> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i64> [[TMP3]], [[TMP4]] +; CHECK-NEXT: store <2 x i64> [[TMP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP5]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp4 = load <2 x i64>, ptr %B + %tmp3 = shl <2 x i64> %tmp1, + %tmp5 = or <2 x i64> %tmp3, %tmp4 + ret <2 x i64> %tmp5 +} + +define <8 x i16> @shll(<8 x i8> %in) sanitize_memory { +; CHECK-LABEL: define <8 x i16> @shll( +; CHECK-SAME: <8 x i8> [[IN:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16> +; CHECK-NEXT: [[EXT:%.*]] = zext <8 x i8> [[IN]] to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i16> [[_MSPROP]], +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = shl <8 x i16> [[EXT]], +; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %ext = zext <8 x i8> %in to <8 x i16> + %res = shl <8 x i16> %ext, + ret <8 x i16> %res +} + +define <4 x i32> @shll_high(<8 x i16> %in) sanitize_memory { +; CHECK-LABEL: define <4 x i32> @shll_high( +; CHECK-SAME: <8 x i16> [[IN:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> , <4 x i32> +; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i16> [[IN]], <8 x i16> undef, <4 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <4 x i16> [[_MSPROP]] to <4 x i32> +; CHECK-NEXT: [[EXT:%.*]] = zext <4 x i16> [[EXTRACT]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[_MSPROP1]], +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = shl <4 x i32> [[EXT]], +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %extract = shufflevector <8 x i16> %in, <8 x i16> undef, <4 x i32> + %ext = zext <4 x i16> %extract to <4 x i32> + %res = shl <4 x i32> %ext, + ret <4 x i32> %res +} + +define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i8> @sli8b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i8> [[_MSLD]] to i64 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], i32 1) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmp1 = load <8 x i8>, ptr %A + %tmp2 = load <8 x i8>, ptr %B + %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i16> @sli4h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]], i32 1) +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmp1 = load <4 x i16>, ptr %A + %tmp2 = load <4 x i16>, ptr %B + %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i32> @sli2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD]] to i64 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], i32 1) +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmp1 = load <2 x i32>, ptr %A + %tmp2 = load <2 x i32>, ptr %B + %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1) + ret <2 x i32> %tmp3 +} + +define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <1 x i64> @sli1d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[_MSLD]] to i64 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]], i32 1) +; CHECK-NEXT: store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmp1 = load <1 x i64>, ptr %A + %tmp2 = load <1 x i64>, ptr %B + %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1) + ret <1 x i64> %tmp3 +} + +define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <16 x i8> @sli16b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], i32 1) +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %tmp1 = load <16 x i8>, ptr %A + %tmp2 = load <16 x i8>, ptr %B + %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <8 x i16> @sli8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]], i32 1) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmp1 = load <8 x i16>, ptr %A + %tmp2 = load <8 x i16>, ptr %B + %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <4 x i32> @sli4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], i32 1) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmp1 = load <4 x i32>, ptr %A + %tmp2 = load <4 x i32>, ptr %B + %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind sanitize_memory { +; CHECK-LABEL: define <2 x i64> @sli2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], i32 1) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmp1 = load <2 x i64>, ptr %A + %tmp2 = load <2 x i64>, ptr %B + %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1) + ret <2 x i64> %tmp3 +} + +declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone + +declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone + +define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) sanitize_memory { +; CHECK-LABEL: define <1 x i64> @ashr_v1i64( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = ashr <1 x i64> [[TMP1]], [[B]] +; CHECK-NEXT: [[TMP6:%.*]] = or <1 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[C:%.*]] = ashr <1 x i64> [[A]], [[B]] +; CHECK-NEXT: store <1 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[C]] +; + %c = ashr <1 x i64> %a, %b + ret <1 x i64> %c +} + +define void @sqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory { +; CHECK-LABEL: define void @sqshl_zero_shift_amount( +; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8 +; CHECK-NEXT: store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8 +; CHECK-NEXT: ret void +; +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define void @uqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory { +; CHECK-LABEL: define void @uqshl_zero_shift_amount( +; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8 +; CHECK-NEXT: store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8 +; CHECK-NEXT: ret void +; +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define void @srshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory { +; CHECK-LABEL: define void @srshl_zero_shift_amount( +; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8 +; CHECK-NEXT: store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8 +; CHECK-NEXT: ret void +; +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define void @urshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory { +; CHECK-LABEL: define void @urshl_zero_shift_amount( +; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8 +; CHECK-NEXT: store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8 +; CHECK-NEXT: ret void +; +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define void @sqshlu_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory { +; CHECK-LABEL: define void @sqshlu_zero_shift_amount( +; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8 +; CHECK-NEXT: store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8 +; CHECK-NEXT: ret void +; +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define void @sshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory { +; CHECK-LABEL: define void @sshl_zero_shift_amount( +; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8 +; CHECK-NEXT: store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8 +; CHECK-NEXT: ret void +; +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define void @ushl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory { +; CHECK-LABEL: define void @ushl_zero_shift_amount( +; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]]) +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8 +; CHECK-NEXT: store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8 +; CHECK-NEXT: ret void +; +entry: + %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) + %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer) + store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8 + ret void +} + +define <4 x i32> @sext_rshrn(<4 x i32> noundef %a) sanitize_memory { +; CHECK-LABEL: define <4 x i32> @sext_rshrn( +; CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[VRSHRN_N1:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[A]], i32 13) +; CHECK-NEXT: [[VMOVL_I:%.*]] = sext <4 x i16> [[VRSHRN_N1]] to <4 x i32> +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[VMOVL_I]] +; +entry: + %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13) + %vmovl.i = sext <4 x i16> %vrshrn_n1 to <4 x i32> + ret <4 x i32> %vmovl.i +} + +define <4 x i32> @zext_rshrn(<4 x i32> noundef %a) sanitize_memory { +; CHECK-LABEL: define <4 x i32> @zext_rshrn( +; CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[VRSHRN_N1:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[A]], i32 13) +; CHECK-NEXT: [[VMOVL_I:%.*]] = zext <4 x i16> [[VRSHRN_N1]] to <4 x i32> +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[VMOVL_I]] +; +entry: + %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13) + %vmovl.i = zext <4 x i16> %vrshrn_n1 to <4 x i32> + ret <4 x i32> %vmovl.i +} + +define <4 x i16> @mul_rshrn(<4 x i32> noundef %a) sanitize_memory { +; CHECK-LABEL: define <4 x i16> @mul_rshrn( +; CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = add <4 x i32> [[A]], +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[_MSPROP]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[VRSHRN_N1:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[B]], i32 13) +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[VRSHRN_N1]] +; +entry: + %b = add <4 x i32> %a, + %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 13) + ret <4 x i16> %vrshrn_n1 +} + +declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>) +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 1048575} +;.