-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[GVNHoist] Missed hoisting opportunity #51089
Comments
Interesting! IR reproducer on Godbolt: https://llvm.godbolt.org/z/h1Eabf5bY I think SimplifyCFG will fold both branches with the store before the branch. I think one possible concern with the store is that it may write to memory that may be visible outside the function, if the call unwinds or something. But it's marked as |
Copying the IR shared by @fhahn from godbolt ; ModuleID = '/app/example.cpp'
source_filename = "/app/example.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: mustprogress nofree nounwind willreturn uwtable
define dso_local noalias noundef ptr @_Z29FIO_createFilename_fromOutDirPKcS0_m(ptr noundef readonly %path, ptr nocapture noundef readonly %outDirName, i64 noundef %suffixLen) local_unnamed_addr #0 {
entry:
%call.i = tail call noundef ptr @strrchr(ptr noundef nonnull dereferenceable(1) %path, i32 noundef 47) #4
%cmp.i = icmp eq ptr %call.i, null
%add.ptr.i = getelementptr inbounds i8, ptr %call.i, i64 1
%retval.0.i = select i1 %cmp.i, ptr %path, ptr %add.ptr.i
%call1 = tail call i64 @strlen(ptr noundef nonnull dereferenceable(1) %outDirName) #4
%call2 = tail call i64 @strlen(ptr noundef nonnull dereferenceable(1) %retval.0.i) #4
%add3 = add i64 %suffixLen, 2
%add4 = add i64 %add3, %call1
%add5 = add i64 %add4, %call2
%call6 = tail call noalias ptr @calloc(i64 noundef 1, i64 noundef %add5) #5
tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 %call6, ptr align 1 %outDirName, i64 %call1, i1 false)
%sub = add i64 %call1, -1
%arrayidx = getelementptr inbounds i8, ptr %outDirName, i64 %sub
%0 = load i8, ptr %arrayidx, align 1, !tbaa !6
%cmp = icmp eq i8 %0, 47
%add.ptr = getelementptr inbounds i8, ptr %call6, i64 %call1
; store i8 47, ptr %add.ptr, align 1 ; HOISTS
br i1 %cmp, label %if.then10, label %if.else
if.then10: ; preds = %entry
%call12 = tail call i64 @strlen(ptr noundef nonnull dereferenceable(1) %retval.0.i) #4
tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 %add.ptr, ptr align 1 %retval.0.i, i64 %call12, i1 false)
br label %if.end19
if.else:
store i8 47, ptr %add.ptr, align 1 ; NO HOIST
%call18 = tail call i64 @strlen(ptr noundef nonnull dereferenceable(1) %retval.0.i) #4
%add.ptr17 = getelementptr inbounds i8, ptr %add.ptr, i64 1
tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %add.ptr17, ptr align 1 %retval.0.i, i64 %call18, i1 false)
br label %if.end19
if.end19: ; preds = %if.else, %if.then10
ret ptr %call6
}
; Function Attrs: inaccessiblememonly mustprogress nofree nounwind willreturn allocsize(0,1)
declare noalias noundef ptr @calloc(i64 noundef, i64 noundef) local_unnamed_addr #1
; Function Attrs: argmemonly mustprogress nofree nounwind readonly willreturn
declare i64 @strlen(ptr nocapture noundef) local_unnamed_addr #2
; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #3
; Function Attrs: argmemonly mustprogress nofree nounwind readonly willreturn
declare noundef ptr @strrchr(ptr noundef, i32 noundef) local_unnamed_addr #2
attributes #0 = { mustprogress nofree nounwind willreturn uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { inaccessiblememonly mustprogress nofree nounwind willreturn allocsize(0,1) "alloc-family"="malloc" "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #2 = { argmemonly mustprogress nofree nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #3 = { argmemonly mustprogress nofree nounwind willreturn }
attributes #4 = { nounwind readonly willreturn }
attributes #5 = { nounwind allocsize(0,1) }
!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}
!0 = !{i32 7, !"Dwarf Version", i32 4}
!1 = !{i32 1, !"wchar_size", i32 4}
!2 = !{i32 7, !"PIC Level", i32 2}
!3 = !{i32 7, !"PIE Level", i32 2}
!4 = !{i32 7, !"uwtable", i32 2}
!5 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project.git 9a12138b5fd8c807c3b95144236c07dfc323974f)"}
!6 = !{!7, !7, i64 0}
!7 = !{!"omnipotent char", !8, i64 0}
!8 = !{!"Simple C++ TBAA"} $ opt -O3 -enable-gvn-hoist -stats
GVNHoist can't hoist because it can't prove that strlen can be moved before the store instruction. it is probably due to alias analysis but i haven't investigated further. |
for some reason both strlen are getting different Ranks, so likely different VN.
|
The GVN value numbering assigned a new VN to both strlen calls. (gdb) n
226 auto Entry = std::make_pair(V, InvalidVN);
(gdb) p V
$49 = 19 <--------------------------------------------------------------- %call12 got 19
(gdb) n
228 if (Call->doesNotAccessMemory())
(gdb) n
230 else if (Call->onlyReadsMemory())
(gdb)
231 VNtoCallsLoads[Entry].push_back(Call);
(gdb) p Entry
$50 = {first = 19, second = llvm::InvalidVN}
(gdb) p Call->dump()
%call12 = tail call i64 @strlen(ptr noundef nonnull dereferenceable(1) %retval.0.i) #4
$51 = void
(gdb) c
Continuing.
Breakpoint 7, llvm::GVNHoist::hoistExpressions (this=0x7fffffffa500, F=...)
at /usr/local/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp:1186
1186 CI.insert(Call, VN);
(gdb) p Call->dump()
%call18 = tail call i64 @strlen(ptr noundef nonnull dereferenceable(1) %retval.0.i) #4
$52 = void
(gdb) s
llvm::CallInfo::insert (this=0x7fffffffa138, Call=0x55555cad0740, VN=...)
at /usr/local/llvm-project/llvm/lib/Transforms/Scalar/GVNHoist.cpp:225
225 unsigned V = VN.lookupOrAdd(Call);
(gdb) n
226 auto Entry = std::make_pair(V, InvalidVN);
(gdb) p V
$53 = 23 <--------------------------------------------------------------- %call18 got 23
(gdb) p Call->dump()
%call18 = tail call i64 @strlen(ptr noundef nonnull dereferenceable(1) %retval.0.i) #4
$54 = void |
A smaller test case to show that GVN is not giving the same number to both strlen calls. It happens because of GVN.cpp:569 // We don't handle non-definitions. If we already have a call, reject
// instruction dependencies.
if (!I.getResult().isDef() || cdep != nullptr) {
cdep = nullptr; // cdep becomes nullptr becaose of Memdep analysis not sure why!
break;
} Test case ; ModuleID = 'b.ll'
source_filename = "/app/example.cpp"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: mustprogress nofree nounwind willreturn uwtable
define dso_local noalias noundef ptr @quux(ptr noundef readonly %arg, ptr nocapture noundef readonly %arg1, i64 noundef %arg2) local_unnamed_addr #0 {
bb:
%icmp = icmp eq ptr %arg1, null
%getelementptr = getelementptr inbounds i8, ptr %arg1, i64 1
%select = select i1 %icmp, ptr %arg, ptr %getelementptr
br i1 %icmp, label %bb3, label %bb4
bb3: ; preds = %bb
%call = tail call i64 @strlen(ptr noundef nonnull dereferenceable(1) %select) #2
br label %bb6
bb4: ; preds = %bb
%call5 = tail call i64 @strlen(ptr noundef nonnull dereferenceable(1) %select) #2
br label %bb6
bb6: ; preds = %bb4, %bb3
ret ptr %arg1
}
; Function Attrs: mustprogress nofree nounwind willreturn memory(argmem: read)
declare i64 @strlen(ptr nocapture noundef) local_unnamed_addr #1
; Function Attrs: mustprogress nofree nounwind willreturn memory(argmem: read)
declare noundef ptr @strrchr(ptr noundef, i32 noundef) local_unnamed_addr #1
attributes #0 = { mustprogress nofree nounwind willreturn uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { mustprogress nofree nounwind willreturn memory(argmem: read) "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #2 = { nounwind willreturn memory(read) } |
Extended Description
Issue: hoist strlen (%20, %23) before branch? GVNHoist does not help. Seems like LLVM is worried about "store i8 47, i8* %18, align 1".
With:
19: ; preds = %3
%20 = tail call i64 @strlen(i8* noundef nonnull dereferenceable(1) %7) #4
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %18, i8* align 1 %7, i64 %20, i1 false)
br label %24
21: ; preds = %3
%22 = tail call i64 @strlen(i8* noundef nonnull dereferenceable(1) %7) #4
%23 = getelementptr inbounds i8, i8* %18, i64 1
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 %23, i8* align 1 %7, i64 %22, i1 false)
br label %24
24: ; preds = %21, %19
ret i8* %13
}
Hoisted fine.
Hoisting failed.
%22
The text was updated successfully, but these errors were encountered: