diff --git a/experimental/iterators/test/Integration/Dialect/Iterators/CPU/accumulate.mlir b/experimental/iterators/test/Integration/Dialect/Iterators/CPU/accumulate.mlir
index 7cf0fff5aec6..9a54ec02dcff 100644
--- a/experimental/iterators/test/Integration/Dialect/Iterators/CPU/accumulate.mlir
+++ b/experimental/iterators/test/Integration/Dialect/Iterators/CPU/accumulate.mlir
@@ -2,9 +2,20 @@
 // RUN:   -convert-iterators-to-llvm \
 // RUN:   -decompose-iterator-states \
 // RUN:   -decompose-tuples \
+// RUN:   -inline -canonicalize \
+// RUN:   -one-shot-bufferize="allow-return-allocs" \
+// RUN:   -buffer-hoisting \
+// RUN:   -buffer-deallocation \
+// RUN:   -convert-bufferization-to-memref \
+// RUN:   -expand-strided-metadata \
+// RUN:   -finalize-memref-to-llvm \
+// RUN:   -convert-scf-to-cf \
 // RUN:   -convert-func-to-llvm \
-// RUN:   -convert-scf-to-cf -convert-cf-to-llvm \
-// RUN: | mlir-cpu-runner -e main -entry-point-result=void \
+// RUN:   -canonicalize \
+// RUN:   -convert-cf-to-llvm \
+// RUN: | mlir-cpu-runner \
+// RUN:   -e main -entry-point-result=void \
+// RUN:   -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
 func.func private @accumulate_sum_tuple(
@@ -72,8 +83,57 @@ func.func @test_accumulate_avg_tuple() {
   return
 }
 
+func.func private @unpack_i32(%input : tuple<i32>) -> i32 {
+  %i = tuple.to_elements %input : tuple<i32>
+  return %i : i32
+}
+
+func.func private @accumulate_histogram(
+      %hist : tensor<4xi32>, %val : i32) -> tensor<4xi32> {
+  %idx = arith.index_cast %val : i32 to index
+  %oldCount = tensor.extract %hist[%idx] : tensor<4xi32>
+  %one = arith.constant 1 : i32
+  %newCount = arith.addi %oldCount, %one : i32
+  %newHist = tensor.insert %newCount into %hist[%idx] : tensor<4xi32>
+  return %newHist : tensor<4xi32>
+}
+
+func.func private @tensor_to_struct(%input : tensor<4xi32>) -> tuple<i32, i32, i32, i32> {
+  %idx0 = arith.constant 0 : index
+  %idx1 = arith.constant 1 : index
+  %idx2 = arith.constant 2 : index
+  %idx3 = arith.constant 3 : index
+  %i0 = tensor.extract %input[%idx0] : tensor<4xi32>
+  %i1 = tensor.extract %input[%idx1] : tensor<4xi32>
+  %i2 = tensor.extract %input[%idx2] : tensor<4xi32>
+  %i3 = tensor.extract %input[%idx3] : tensor<4xi32>
+  %tuple = tuple.from_elements %i0, %i1, %i2, %i3 : tuple<i32, i32, i32, i32>
+  return %tuple : tuple<i32, i32, i32, i32>
+}
+
+// CHECK-LABEL: test_accumulate_histogram
+// CHECK-NEXT:  (1, 2, 1, 0)
+// CHECK-NEXT:  -
+func.func @test_accumulate_histogram() {
+  iterators.print("test_accumulate_histogram")
+  %input = "iterators.constantstream"()
+      { value = [[0 : i32], [1 : i32], [1 : i32], [2 : i32]] }
+      : () -> (!iterators.stream<tuple<i32>>)
+  %unpacked = "iterators.map"(%input) {mapFuncRef = @unpack_i32}
+    : (!iterators.stream<tuple<i32>>) -> (!iterators.stream<i32>)
+  %init_value = arith.constant dense<[0, 0, 0, 0]> : tensor<4xi32>
+  %accumulated = iterators.accumulate(%unpacked, %init_value)
+    with @accumulate_histogram
+      : (!iterators.stream<i32>) -> !iterators.stream<tensor<4xi32>>
+  %transposed = "iterators.map"(%accumulated) {mapFuncRef = @tensor_to_struct}
+    : (!iterators.stream<tensor<4xi32>>) -> (!iterators.stream<tuple<i32, i32, i32, i32>>)
+  "iterators.sink"(%transposed) : (!iterators.stream<tuple<i32, i32, i32, i32>>) -> ()
+  return
+}
+
 func.func @main() {
   call @test_accumulate_sum_tuple() : () -> ()
   call @test_accumulate_avg_tuple() : () -> ()
+  call @test_accumulate_histogram() : () -> ()
   return
 }