[llvm-dev] gep and strength reduction

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[llvm-dev] gep and strength reduction

U.Mutlu via llvm-dev
Hello,

I was wondering if we had a way to strength reduce getelementptr today.

For the following code:

void func(float (&A)[132][140]) {
  for (int i = 0; i < 132; ++i)
    for (int j = 0; j < 140; ++j)
      A[i][j] += 1;
}

We will generate: (annotated with SCEV)

define void @func([132 x [140 x float]]* nocapture dereferenceable(73920)) local_unnamed_addr #0 {
  br label %2

; <label>:2:                                      ; preds = %11, %1
; loop<%2> at depth 1 with exact backedge-taken count of 131
; %12 = {1,+,1}<nuw><nsw><%2>
; %3 = {0,+,1}<nuw><nsw><%2>
  %3 = phi i64 [ 0, %1 ], [ %12, %11 ]
  br label %4

; <label>:4:                                      ; preds = %4, %2
; loop<%4> at depth 2 with exact backedge-taken count of 139
; %5 = {0,+,1}<nuw><nsw><%4>
  %5 = phi i64 [ 0, %2 ], [ %9, %4 ]
; %3 = {0,+,1}<nuw><nsw><%2>
; %6 = {{%0,+,560}<nsw><%2>,+,4}<nw><%4>
  %6 = getelementptr inbounds [132 x [140 x float]], [132 x [140 x float]]* %0, i64 0, i64 %3, i64 %5
  %7 = load float, float* %6, align 4, !tbaa !2
  %8 = fadd float %7, 1.000000e+00
  store float %8, float* %6, align 4, !tbaa !2
; %9 = {1,+,1}<nuw><nsw><%4>
  %9 = add nuw nsw i64 %5, 1
  %10 = icmp eq i64 %9, 140
  br i1 %10, label %11, label %4

; <label>:11:                                     ; preds = %4
; loop<%2> at depth 1 with exact backedge-taken count of 131
; %3 = {0,+,1}<nuw><nsw><%2>
; %12 = {1,+,1}<nuw><nsw><%2>
  %12 = add nuw nsw i64 %3, 1
  %13 = icmp eq i64 %12, 132
  br i1 %13, label %14, label %2

; <label>:14:                                     ; preds = %11
  ret void
}

And we see clearly that the gep is "complicated" in the sense that it does two addition and two multiplication.

Instead we could produce the following code:

define void @func([132 x [140 x float]]* nocapture dereferenceable(73920)) local_unnamed_addr #0 {
  br label %2

; <label>:2:                                      ; preds = %12, %1
; loop<%2> at depth 1 with exact backedge-taken count of 131
; %13 = {1,+,1}<nuw><nsw><%2>
; %3 = {0,+,1}<nuw><nsw><%2>
  %3 = phi i64 [ 0, %1 ], [ %13, %12 ]
; %4 = {%0,+,560}<nsw><%2>
  %4 = getelementptr inbounds [132 x [140 x float]], [132 x [140 x float]]* %0, i64 0, i64 %3
  br label %5

; <label>:5:                                      ; preds = %5, %2
; loop<%5> at depth 2 with exact backedge-taken count of 139
; %6 = {0,+,1}<nuw><nsw><%5>
  %6 = phi i64 [ 0, %2 ], [ %10, %5 ]
; %4 = {%0,+,560}<nsw><%2>
; %7 = {{%0,+,560}<nsw><%2>,+,4}<nsw><%5>
  %7 = getelementptr inbounds [140 x float], [140 x float]* %4, i64 0, i64 %6
  %8 = load float, float* %7, align 4, !tbaa !2
  %9 = fadd float %8, 1.000000e+00
  store float %9, float* %7, align 4, !tbaa !2
; %10 = {1,+,1}<nuw><nsw><%5>
  %10 = add nuw nsw i64 %6, 1
  %11 = icmp eq i64 %10, 140
  br i1 %11, label %12, label %5

; <label>:12:                                     ; preds = %5
; loop<%2> at depth 1 with exact backedge-taken count of 131
; %3 = {0,+,1}<nuw><nsw><%2>
; %13 = {1,+,1}<nuw><nsw><%2>
  %13 = add nuw nsw i64 %3, 1
  %14 = icmp eq i64 %13, 132
  br i1 %14, label %15, label %2

; <label>:15:                                     ; preds = %12
  ret void
}

Where the gep has been strength reduced to one addition one multiplication.

Or even better (at least on the architecture I'm targeting):

define void @func([132 x [140 x float]]* nocapture dereferenceable(73920)) local_unnamed_addr #0 {
; %2 = %0
  %2 = getelementptr inbounds [132 x [140 x float]], [132 x [140 x float]]* %0, i64 0, i64 0
; %3 = (73920 + %0)<nsw>
  %3 = getelementptr inbounds [132 x [140 x float]], [132 x [140 x float]]* %0, i64 1, i64 0
  br label %4

; <label>:4:                                      ; preds = %14, %1
; loop<%4> at depth 1 with exact backedge-taken count of 131
; %2 = %0
; %15 = {(560 + %0)<nsw>,+,560}<nuw><%4>
; %5 = {%0,+,560}<nuw><%4>
  %5 = phi [140 x float]* [ %2, %1 ], [ %15, %14 ]
; %6 = {%0,+,560}<nuw><%4>
  %6 = getelementptr inbounds [140 x float], [140 x float]* %5, i64 0, i64 0
; %7 = {(560 + %0)<nsw>,+,560}<nuw><%4>
  %7 = getelementptr inbounds [140 x float], [140 x float]* %5, i64 1, i64 0
  br label %8

; <label>:8:                                      ; preds = %8, %4
; loop<%8> at depth 2 with exact backedge-taken count of 139
; %6 = {%0,+,560}<nuw><%4>
; %9 = {{%0,+,560}<nuw><%4>,+,4}<nuw><%8>
  %9 = phi float* [ %6, %4 ], [ %12, %8 ]
  %10 = load float, float* %9, align 4, !tbaa !2
  %11 = fadd float %10, 1.000000e+00
  store float %11, float* %9, align 4, !tbaa !2
; %12 = {{(4 + %0)<nsw>,+,560}<nw><%4>,+,4}<nuw><%8>
  %12 = getelementptr inbounds float, float* %9, i64 1
; %7 = {(560 + %0)<nsw>,+,560}<nuw><%4>
  %13 = icmp eq float* %12, %7
  br i1 %13, label %14, label %8

; <label>:14:                                     ; preds = %8
; loop<%4> at depth 1 with exact backedge-taken count of 131
; %5 = {%0,+,560}<nuw><%4>
; %15 = {(560 + %0)<nsw>,+,560}<nuw><%4>
  %15 = getelementptr inbounds [140 x float], [140 x float]* %5, i64 1
; %3 = (73920 + %0)<nsw>
  %16 = icmp eq [140 x float]* %15, %3
  br i1 %16, label %17, label %4

; <label>:17:                                     ; preds = %14
  ret void
}

Where we only have addition by a constant (I also got rid of the induction variables, but that's just being fancy). Do we have a way to do that automatically?

Why don't we? (I'm guessing because that's negligible on X86)

--
Alexandre Isoard

_______________________________________________
LLVM Developers mailing list
[hidden email]
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev