From cddd895748542f3d393082a8a9094e2ab695eb20 Mon Sep 17 00:00:00 2001 From: Thomas Benz Date: Tue, 31 Oct 2023 15:51:12 +0100 Subject: [PATCH] frontend: Fetch *inst64* updates from Occamy --- src/frontend/inst64/axi_dma_perf_counters.sv | 57 +++++++++++++++++--- src/frontend/inst64/axi_dma_tc_snitch_fe.sv | 17 ++++-- 2 files changed, 62 insertions(+), 12 deletions(-) diff --git a/src/frontend/inst64/axi_dma_perf_counters.sv b/src/frontend/inst64/axi_dma_perf_counters.sv index 00eeb101..13ac6a93 100644 --- a/src/frontend/inst64/axi_dma_perf_counters.sv +++ b/src/frontend/inst64/axi_dma_perf_counters.sv @@ -7,12 +7,14 @@ `include "common_cells/registers.svh" -/// Sample implementation of performance counters. +// Sample implementation of performance counters. module axi_dma_perf_counters #( parameter int unsigned TRANSFER_ID_WIDTH = -1, parameter int unsigned DATA_WIDTH = -1, parameter type axi_req_t = logic, - parameter type axi_res_t = logic + parameter type axi_res_t = logic, + parameter type dma_events_t = logic, + localparam bit EnablePerfCounters = 0 ) ( input logic clk_i, input logic rst_ni, @@ -25,7 +27,8 @@ module axi_dma_perf_counters #( // DMA busy input logic dma_busy_i, // performance bus - output axi_dma_pkg::dma_perf_t dma_perf_o + output axi_dma_pkg::dma_perf_t dma_perf_o, + output dma_events_t dma_events_o ); localparam int unsigned StrbWidth = DATA_WIDTH / 8; @@ -33,6 +36,9 @@ module axi_dma_perf_counters #( // internal state axi_dma_pkg::dma_perf_t dma_perf_d, dma_perf_q; + // Event counter + dma_events_t dma_events; + // need popcount common cell to get the number of bytes active in the strobe signal logic [$clog2(StrbWidth)+1-1:0] num_bytes_written; popcount #( @@ -45,101 +51,129 @@ module axi_dma_perf_counters #( // see if counters should be increased always_comb begin : proc_next_perf_state - // defualt: keep old value + // default: keep old value dma_perf_d = dma_perf_q; + dma_events = '0; // aw if ( axi_dma_req_i.aw_valid) begin dma_perf_d.aw_valid_cnt = dma_perf_q.aw_valid_cnt + 'h1; + dma_events.aw_valid = 1'b1; end if ( axi_dma_res_i.aw_ready) begin dma_perf_d.aw_ready_cnt = dma_perf_q.aw_ready_cnt + 'h1; + dma_events.aw_ready = 1'b1; end if ( axi_dma_res_i.aw_ready && axi_dma_req_i.aw_valid) begin dma_perf_d.aw_done_cnt = dma_perf_q.aw_done_cnt + 'h1; + dma_events.aw_done = 1'b1; end if ( axi_dma_res_i.aw_ready && axi_dma_req_i.aw_valid) begin dma_perf_d.aw_bw = dma_perf_q.aw_bw + ((axi_dma_req_i.aw.len + 1) << axi_dma_req_i.aw.size); + dma_events.aw_len = axi_dma_req_i.aw.len; + dma_events.aw_size = axi_dma_req_i.aw.size; end if (!axi_dma_res_i.aw_ready && axi_dma_req_i.aw_valid) begin dma_perf_d.aw_stall_cnt = dma_perf_q.aw_stall_cnt + 'h1; + dma_events.aw_stall = 1'b1; end // ar if (axi_dma_req_i.ar_valid) begin dma_perf_d.ar_valid_cnt = dma_perf_q.ar_valid_cnt + 'h1; + dma_events.ar_valid = 1'b1; end if (axi_dma_res_i.ar_ready) begin dma_perf_d.ar_ready_cnt = dma_perf_q.ar_ready_cnt + 'h1; + dma_events.ar_ready = 1'b1; end if (axi_dma_res_i.ar_ready && axi_dma_req_i.ar_valid) begin dma_perf_d.ar_done_cnt = dma_perf_q.ar_done_cnt + 'h1; + dma_events.ar_done = 1'b1; end if (axi_dma_res_i.ar_ready && axi_dma_req_i.ar_valid) begin dma_perf_d.ar_bw = dma_perf_q.ar_bw + ((axi_dma_req_i.ar.len + 1) << axi_dma_req_i.ar.size); + dma_events.ar_len = axi_dma_req_i.ar.len; + dma_events.ar_size = axi_dma_req_i.ar.size; end if (!axi_dma_res_i.ar_ready && axi_dma_req_i.ar_valid) begin dma_perf_d.ar_stall_cnt = dma_perf_q.ar_stall_cnt + 'h1; + dma_events.ar_stall = 1'b1; end // r if (axi_dma_res_i.r_valid) begin dma_perf_d.r_valid_cnt = dma_perf_q.r_valid_cnt + 'h1; + dma_events.r_valid = 1'b1; end if (axi_dma_req_i.r_ready) begin dma_perf_d.r_ready_cnt = dma_perf_q.r_ready_cnt + 'h1; + dma_events.r_ready = 1'b1; end if (axi_dma_req_i.r_ready && axi_dma_res_i.r_valid) begin dma_perf_d.r_done_cnt = dma_perf_q.r_done_cnt + 'h1; + dma_events.r_done = 1'b1; end if (axi_dma_req_i.r_ready && axi_dma_res_i.r_valid) begin dma_perf_d.r_bw = dma_perf_q.r_bw + DATA_WIDTH / 8; + dma_events.r_bw = 1'b1; end if (axi_dma_req_i.r_ready && !axi_dma_res_i.r_valid) begin dma_perf_d.r_stall_cnt = dma_perf_q.r_stall_cnt + 'h1; + dma_events.r_stall = 1'b1; end // w if (axi_dma_req_i.w_valid) begin dma_perf_d.w_valid_cnt = dma_perf_q.w_valid_cnt + 'h1; + dma_events.w_valid = 1'b1; end if (axi_dma_res_i.w_ready) begin dma_perf_d.w_ready_cnt = dma_perf_q.w_ready_cnt + 'h1; + dma_events.w_ready = 1'b1; end if (axi_dma_res_i.w_ready && axi_dma_req_i.w_valid) begin dma_perf_d.w_done_cnt = dma_perf_q.w_done_cnt + 'h1; + dma_events.w_done = 1'b1; end if (axi_dma_res_i.w_ready && axi_dma_req_i.w_valid) begin dma_perf_d.w_bw = dma_perf_q.w_bw + num_bytes_written; + dma_events.num_bytes_written = num_bytes_written; end if (!axi_dma_res_i.w_ready && axi_dma_req_i.w_valid) begin dma_perf_d.w_stall_cnt = dma_perf_q.w_stall_cnt + 'h1; + dma_events.w_stall = 1'b1; end // b if (axi_dma_res_i.b_valid) begin dma_perf_d.b_valid_cnt = dma_perf_q.b_valid_cnt + 'h1; + dma_events.b_valid = 1'b1; end if (axi_dma_req_i.b_ready) begin dma_perf_d.b_ready_cnt = dma_perf_q.b_ready_cnt + 'h1; + dma_events.b_ready = 1'b1; end if (axi_dma_req_i.b_ready && axi_dma_res_i.b_valid) begin dma_perf_d.b_done_cnt = dma_perf_q.b_done_cnt + 'h1; + dma_events.b_done = 1'b1; end // buffer if ( axi_dma_res_i.w_ready && !axi_dma_req_i.w_valid) begin dma_perf_d.buf_w_stall_cnt = dma_perf_q.buf_w_stall_cnt + 'h1; + dma_events.w_stall = 1'b1; end if (!axi_dma_req_i.r_ready && axi_dma_res_i.r_valid) begin dma_perf_d.buf_r_stall_cnt = dma_perf_q.buf_r_stall_cnt + 'h1; + dma_events.r_stall = 1'b1; end // ids @@ -147,10 +181,19 @@ module axi_dma_perf_counters #( dma_perf_d.completed_id = 32'h0 + completed_id_i; // busy - if (dma_busy_i) dma_perf_d.dma_busy_cnt = dma_perf_q.dma_busy_cnt + 'h1; + if (dma_busy_i) begin + dma_perf_d.dma_busy_cnt = dma_perf_q.dma_busy_cnt + 'h1; + dma_events.dma_busy = 1'b1; + end + end + + assign dma_events_o = dma_events; + + if (EnablePerfCounters) begin : gen_perf_counters + `FF(dma_perf_q, dma_perf_d, 0); + assign dma_perf_o = dma_perf_q; end - `FF(dma_perf_q, dma_perf_d, 0); - assign dma_perf_o = dma_perf_q; + endmodule diff --git a/src/frontend/inst64/axi_dma_tc_snitch_fe.sv b/src/frontend/inst64/axi_dma_tc_snitch_fe.sv index 3f8db8ab..b168b43f 100644 --- a/src/frontend/inst64/axi_dma_tc_snitch_fe.sv +++ b/src/frontend/inst64/axi_dma_tc_snitch_fe.sv @@ -19,6 +19,7 @@ module axi_dma_tc_snitch_fe #( parameter type axi_req_t = logic, parameter type axi_res_t = logic, parameter type acc_resp_t = logic, + parameter type dma_events_t = logic, /// Derived parameter *Do not override* parameter type addr_t = logic [AddrWidth-1:0], parameter type data_t = logic [DataWidth-1:0] @@ -50,7 +51,8 @@ module axi_dma_tc_snitch_fe #( input logic [31:0] hart_id_i, // performance output - output axi_dma_pkg::dma_perf_t dma_perf_o + output axi_dma_pkg::dma_perf_t dma_perf_o, + output dma_events_t dma_events_o ); typedef logic [IdWidth-1:0] id_t; @@ -62,6 +64,7 @@ module axi_dma_tc_snitch_fe #( axi_pkg::burst_t burst_src, burst_dst; logic decouple_rw; logic deburst; + logic serialize; } burst_req_t; typedef struct packed { @@ -137,8 +140,8 @@ module axi_dma_tc_snitch_fe #( //-------------------------------------- // Buffer twod last //-------------------------------------- - localparam int unsigned TwodBufferDepth = 2 * DMAReqFifoDepth + - DMAAxiReqFifoDepth + 3 + 1; + localparam int unsigned TwodBufferDepth = 2 * (DMAReqFifoDepth + + DMAAxiReqFifoDepth) + 3 + 1; logic twod_req_last_realigned; fifo_v3 # ( .DATA_WIDTH ( 1 ), @@ -176,7 +179,8 @@ module axi_dma_tc_snitch_fe #( .TRANSFER_ID_WIDTH ( 32 ), .DATA_WIDTH ( DMADataWidth ), .axi_req_t ( axi_req_t ), - .axi_res_t ( axi_res_t ) + .axi_res_t ( axi_res_t ), + .dma_events_t ( dma_events_t ) ) i_axi_dma_perf_counters ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), @@ -185,7 +189,8 @@ module axi_dma_tc_snitch_fe #( .next_id_i ( next_id ), .completed_id_i ( completed_id ), .dma_busy_i ( dma_busy_o ), - .dma_perf_o ( dma_perf_o ) + .dma_perf_o ( dma_perf_o ), + .dma_events_o ( dma_events_o ) ); //-------------------------------------- @@ -263,6 +268,7 @@ module axi_dma_tc_snitch_fe #( automatic logic [1:0] cfg; // Parse the transfer parameters from the register or immediate. + cfg = '0; unique casez (acc_qdata_op_i) riscv_instr::DMCPYI : cfg = acc_qdata_op_i[24:20]; riscv_instr::DMCPY : cfg = acc_qdata_argb_i; @@ -299,6 +305,7 @@ module axi_dma_tc_snitch_fe #( automatic logic [1:0] status; // Parse the status index from the register or immediate. + status = '0; unique casez (acc_qdata_op_i) riscv_instr::DMSTATI: status = acc_qdata_op_i[24:20]; riscv_instr::DMSTAT: status = acc_qdata_argb_i;