Skip to content

Commit

Permalink
Merge pull request #1198 from Xilinx/tpreusse.mvu_harden
Browse files Browse the repository at this point in the history
Harden synthesis-time lane width computation against numeric overflow in exponentials.
  • Loading branch information
preusser committed Sep 26, 2024
2 parents 71b546b + 945a4a4 commit 7076ed3
Show file tree
Hide file tree
Showing 3 changed files with 362 additions and 4 deletions.
12 changes: 8 additions & 4 deletions finn-rtllib/mvu/mvu_8sx8u_dsp48.sv
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ module mvu_8sx8u_dsp48 #(
return res;
endfunction : init_leave_loads

function int unsigned sum_width(input int unsigned n, input int unsigned w);
return w <= 16? $clog2(1 + n*(2**w - 1)) : w + $clog2(n);
endfunction : sum_width

// Pipeline for last indicator flag
logic [1:5] L = '0;
always_ff @(posedge clk) begin
Expand Down Expand Up @@ -445,7 +449,7 @@ module mvu_8sx8u_dsp48 #(
// Stage #4: Cross-SIMD Reduction

// Count leaves reachable from each node
localparam leave_load_t LEAVE_LOAD = SIMD > 1 ? init_leave_loads() : '{ default: 0}; // SIMD=1 requires no adder tree, so zero-ing out, otherwise init_leave_loads ends up in infinite loop
localparam leave_load_t LEAVE_LOAD = SIMD > 1 ? init_leave_loads() : '{ default: 0 }; // SIMD=1 requires no adder tree, so zero-ing out, otherwise init_leave_loads ends up in infinite loop

// Range of Cross-lane Contribution Tracked in Hi4
/*
Expand All @@ -462,7 +466,7 @@ module mvu_8sx8u_dsp48 #(
* signed value is determined by its lower bound to be at least:
* 1 + $clog2(2^(w-1)+SIMD)
*/
localparam int unsigned HI_WIDTH = 1 + $clog2(2**(ACCU_WIDTH-D[1]-1)+SIMD);
localparam int unsigned HI_WIDTH = 1 + ($clog2(SIMD) < ACCU_WIDTH-D[1]? ACCU_WIDTH-D[1] : $clog2(2**(ACCU_WIDTH-D[1]-1)+SIMD));

uwire signed [ACCU_WIDTH -1:0] up4;
uwire signed [HI_WIDTH -1:0] hi4;
Expand Down Expand Up @@ -504,12 +508,12 @@ module mvu_8sx8u_dsp48 #(
// Conclusive low part accumulation
if(i >= PE_REM) begin : blkLo
// Adder Tree across all SIMD low contributions (all unsigned arithmetic)
localparam int unsigned ROOT_WIDTH = $clog2(1 + SIMD*(2**LO_WIDTH-1));
localparam int unsigned ROOT_WIDTH = sum_width(SIMD, LO_WIDTH);
uwire [2*SIMD-2:0][ROOT_WIDTH-1:0] tree;
for(genvar s = 0; s < SIMD; s++) assign tree[SIMD-1+s] = p3[s][D[i]+:LO_WIDTH];
for(genvar n = 0; n < SIMD-1; n++) begin
// Sum truncated to actual maximum bit width at this node
localparam int unsigned NODE_WIDTH = $clog2(1 + LEAVE_LOAD[n]*(2**LO_WIDTH-1));
localparam int unsigned NODE_WIDTH = sum_width(LEAVE_LOAD[n], LO_WIDTH);
uwire [NODE_WIDTH-1:0] s = tree[2*n+1] + tree[2*n+2];
assign tree[n] = s;
end
Expand Down
192 changes: 192 additions & 0 deletions finn-rtllib/mvu/tb/mvu_accu_tb.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
9
4
d
9
2
a
d
7
9
7
b
4
4
7
0
0
c
9
9
1
9
0
a
0
5
5
7
7
2
6
7
9
0
0
9
7
7
c
7
9
7
1
2
0
f
7
1
7
f
7
1
7
1
6
6
9
e
f
e
a
6
1
7
9
d
a
7
7
f
4
7
f
9
f
9
1
9
f
7
3
4
1
1
0
d
c
d
b
9
9
f
7
0
5
e
6
7
e
7
1
7
0
e
3
c
4
9
7
9
9
d
e
c
1
f
7
0
7
1
7
d
0
7
e
a
1
9
4
b
7
9
0
a
e
6
7
2
9
0
9
0
9
1
9
0
0
7
2
7
1
5
9
1
9
6
7
c
1
9
d
9
f
c
9
9
9
b
b
9
f
9
5
1
3
0
9
0
9
2
a
9
0
f
0
7
0
a
7
3
e
5
7
162 changes: 162 additions & 0 deletions finn-rtllib/mvu/tb/mvu_accu_tb.sv
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/******************************************************************************
* Copyright (C) 2024, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @brief Testbench for MVU core compute kernel.
*****************************************************************************/

module mvu_accu_tb;

localparam IS_MVU = 1;
localparam COMPUTE_CORE = "mvu_8sx8u_dsp48";
localparam PUMPED_COMPUTE = 0;
localparam MW = 6;
localparam MH = 32;
localparam PE = 1;
localparam SIMD = 1;
localparam ACTIVATION_WIDTH = 8;
localparam WEIGHT_WIDTH = 4;
localparam NARROW_WEIGHTS = 1;
localparam SIGNED_ACTIVATIONS = 1;
localparam SEGMENTLEN = 1;
localparam FORCE_BEHAVIORAL = 0;

// Safely deducible parameters
localparam WEIGHT_STREAM_WIDTH_BA = (PE*SIMD*WEIGHT_WIDTH+7)/8 * 8;
localparam INPUT_STREAM_WIDTH_BA = ((IS_MVU == 1 ? 1 : PE) * SIMD * ACTIVATION_WIDTH + 7) / 8 * 8;

// Global Control
logic clk = 0;
always #5ns clk = !clk;
logic rst = 1;
initial begin
repeat(16) @(posedge clk);
rst <= 0;
end

logic [WEIGHT_WIDTH-1:0] WeightMem[MH*MW];
initial $readmemh("mvu_accu_tb.dat", WeightMem);

// Shared Input Feed
logic [INPUT_STREAM_WIDTH_BA-1:0] in_TDATA;
logic in_TVALID[2];
uwire in_TREADY[2];
initial begin
in_TDATA = 'x;
in_TVALID = '{ default: 0 };
@(posedge clk iff !rst);

repeat(2161*MW) begin
automatic logic [ACTIVATION_WIDTH-1:0] a = $urandom();
in_TDATA <= a;
in_TVALID <= '{ default: 1 };
fork
begin
@(posedge clk iff in_TREADY[0]);
in_TVALID[0] <= 0;
end
begin
@(posedge clk iff in_TREADY[1]);
in_TVALID[1] <= 0;
end
join
end

repeat(MH*MW) @(posedge clk);
$display("Test completed.");
$finish;
end

// DUTs
localparam int unsigned ACCU_WIDTHS[2] = '{ 16, 32 };
int OutQ[2][$];
for(genvar i = 0; i < $size(ACCU_WIDTHS); i++) begin : genDUTs
localparam int unsigned ACCU_WIDTH = ACCU_WIDTHS[i];
localparam int unsigned OUTPUT_STREAM_WIDTH_BA = (PE*ACCU_WIDTH + 7)/8 * 8;

// Private Weight Feed
logic [WEIGHT_STREAM_WIDTH_BA-1:0] weights_TDATA;
logic weights_TVALID;
uwire weights_TREADY;
initial begin
weights_TDATA = 'x;
weights_TVALID = 0;
@(posedge clk iff !rst);

weights_TVALID <= 1;
forever begin
for(int unsigned i = 0; i < MH*MW; i++) begin
weights_TDATA <= WeightMem[i];
@(posedge clk iff weights_TREADY);
end
end
end

// Private Output Capture into Queue
uwire signed [OUTPUT_STREAM_WIDTH_BA-1:0] out_TDATA;
uwire out_TVALID;
uwire out_TREADY = !rst;
always_ff @(posedge clk iff !rst) begin
if(out_TVALID) OutQ[i].push_back(out_TDATA);
end

// Actual DUT Instance
mvu_vvu_axi #(
.IS_MVU(IS_MVU), .COMPUTE_CORE(COMPUTE_CORE), .PUMPED_COMPUTE(PUMPED_COMPUTE), .MW(MW), .MH(MH), .PE(PE), .SIMD(SIMD),
.ACTIVATION_WIDTH(ACTIVATION_WIDTH), .WEIGHT_WIDTH(WEIGHT_WIDTH), .ACCU_WIDTH(ACCU_WIDTH), .NARROW_WEIGHTS(NARROW_WEIGHTS),
.SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .SEGMENTLEN(SEGMENTLEN), .FORCE_BEHAVIORAL(FORCE_BEHAVIORAL)
) dut (
.ap_clk(clk),
.ap_clk2x(1'b0),
.ap_rst_n(!rst),
.s_axis_weights_tdata(weights_TDATA),
.s_axis_weights_tvalid(weights_TVALID),
.s_axis_weights_tready(weights_TREADY),
.s_axis_input_tdata(in_TDATA),
.s_axis_input_tvalid(in_TVALID[i]),
.s_axis_input_tready(in_TREADY[i]),
.m_axis_output_tdata(out_TDATA),
.m_axis_output_tvalid(out_TVALID),
.m_axis_output_tready(out_TREADY)
);
end : genDUTs

// Output Equivalence Checker
always_ff @(posedge clk) begin
if(OutQ[0].size && OutQ[1].size) begin
automatic int unsigned y0 = OutQ[0].pop_front();
automatic int unsigned y1 = OutQ[1].pop_front();
assert(y0 == y1) else begin
$error("Output Mismatch: %0d vs. %0d", y0, y1);
$stop;
end
end
end

endmodule : mvu_accu_tb

0 comments on commit 7076ed3

Please sign in to comment.