Skip to content

Commit

Permalink
frontend: Fetch *inst64* updates from Occamy
Browse files Browse the repository at this point in the history
  • Loading branch information
thommythomaso committed Nov 1, 2023
1 parent 2579bf7 commit cddd895
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 12 deletions.
57 changes: 50 additions & 7 deletions src/frontend/inst64/axi_dma_perf_counters.sv
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@

`include "common_cells/registers.svh"

/// Sample implementation of performance counters.
// Sample implementation of performance counters.
module axi_dma_perf_counters #(
parameter int unsigned TRANSFER_ID_WIDTH = -1,
parameter int unsigned DATA_WIDTH = -1,
parameter type axi_req_t = logic,
parameter type axi_res_t = logic
parameter type axi_res_t = logic,
parameter type dma_events_t = logic,
localparam bit EnablePerfCounters = 0
) (
input logic clk_i,
input logic rst_ni,
Expand All @@ -25,14 +27,18 @@ module axi_dma_perf_counters #(
// DMA busy
input logic dma_busy_i,
// performance bus
output axi_dma_pkg::dma_perf_t dma_perf_o
output axi_dma_pkg::dma_perf_t dma_perf_o,
output dma_events_t dma_events_o
);

localparam int unsigned StrbWidth = DATA_WIDTH / 8;

// internal state
axi_dma_pkg::dma_perf_t dma_perf_d, dma_perf_q;

// Event counter
dma_events_t dma_events;

// need popcount common cell to get the number of bytes active in the strobe signal
logic [$clog2(StrbWidth)+1-1:0] num_bytes_written;
popcount #(
Expand All @@ -45,112 +51,149 @@ module axi_dma_perf_counters #(
// see if counters should be increased
always_comb begin : proc_next_perf_state

// defualt: keep old value
// default: keep old value
dma_perf_d = dma_perf_q;
dma_events = '0;

// aw
if ( axi_dma_req_i.aw_valid) begin
dma_perf_d.aw_valid_cnt = dma_perf_q.aw_valid_cnt + 'h1;
dma_events.aw_valid = 1'b1;
end

if ( axi_dma_res_i.aw_ready) begin
dma_perf_d.aw_ready_cnt = dma_perf_q.aw_ready_cnt + 'h1;
dma_events.aw_ready = 1'b1;
end

if ( axi_dma_res_i.aw_ready && axi_dma_req_i.aw_valid) begin
dma_perf_d.aw_done_cnt = dma_perf_q.aw_done_cnt + 'h1;
dma_events.aw_done = 1'b1;
end

if ( axi_dma_res_i.aw_ready && axi_dma_req_i.aw_valid) begin
dma_perf_d.aw_bw =
dma_perf_q.aw_bw + ((axi_dma_req_i.aw.len + 1) << axi_dma_req_i.aw.size);
dma_events.aw_len = axi_dma_req_i.aw.len;
dma_events.aw_size = axi_dma_req_i.aw.size;
end

if (!axi_dma_res_i.aw_ready && axi_dma_req_i.aw_valid) begin
dma_perf_d.aw_stall_cnt = dma_perf_q.aw_stall_cnt + 'h1;
dma_events.aw_stall = 1'b1;
end


// ar
if (axi_dma_req_i.ar_valid) begin
dma_perf_d.ar_valid_cnt = dma_perf_q.ar_valid_cnt + 'h1;
dma_events.ar_valid = 1'b1;
end
if (axi_dma_res_i.ar_ready) begin
dma_perf_d.ar_ready_cnt = dma_perf_q.ar_ready_cnt + 'h1;
dma_events.ar_ready = 1'b1;
end
if (axi_dma_res_i.ar_ready && axi_dma_req_i.ar_valid) begin
dma_perf_d.ar_done_cnt = dma_perf_q.ar_done_cnt + 'h1;
dma_events.ar_done = 1'b1;
end
if (axi_dma_res_i.ar_ready && axi_dma_req_i.ar_valid) begin
dma_perf_d.ar_bw =
dma_perf_q.ar_bw + ((axi_dma_req_i.ar.len + 1) << axi_dma_req_i.ar.size);
dma_events.ar_len = axi_dma_req_i.ar.len;
dma_events.ar_size = axi_dma_req_i.ar.size;
end
if (!axi_dma_res_i.ar_ready && axi_dma_req_i.ar_valid) begin
dma_perf_d.ar_stall_cnt = dma_perf_q.ar_stall_cnt + 'h1;
dma_events.ar_stall = 1'b1;
end

// r
if (axi_dma_res_i.r_valid) begin
dma_perf_d.r_valid_cnt = dma_perf_q.r_valid_cnt + 'h1;
dma_events.r_valid = 1'b1;
end
if (axi_dma_req_i.r_ready) begin
dma_perf_d.r_ready_cnt = dma_perf_q.r_ready_cnt + 'h1;
dma_events.r_ready = 1'b1;
end
if (axi_dma_req_i.r_ready && axi_dma_res_i.r_valid) begin
dma_perf_d.r_done_cnt = dma_perf_q.r_done_cnt + 'h1;
dma_events.r_done = 1'b1;
end
if (axi_dma_req_i.r_ready && axi_dma_res_i.r_valid) begin
dma_perf_d.r_bw = dma_perf_q.r_bw + DATA_WIDTH / 8;
dma_events.r_bw = 1'b1;
end
if (axi_dma_req_i.r_ready && !axi_dma_res_i.r_valid) begin
dma_perf_d.r_stall_cnt = dma_perf_q.r_stall_cnt + 'h1;
dma_events.r_stall = 1'b1;
end

// w
if (axi_dma_req_i.w_valid) begin
dma_perf_d.w_valid_cnt = dma_perf_q.w_valid_cnt + 'h1;
dma_events.w_valid = 1'b1;
end
if (axi_dma_res_i.w_ready) begin
dma_perf_d.w_ready_cnt = dma_perf_q.w_ready_cnt + 'h1;
dma_events.w_ready = 1'b1;
end
if (axi_dma_res_i.w_ready && axi_dma_req_i.w_valid) begin
dma_perf_d.w_done_cnt = dma_perf_q.w_done_cnt + 'h1;
dma_events.w_done = 1'b1;
end
if (axi_dma_res_i.w_ready && axi_dma_req_i.w_valid) begin
dma_perf_d.w_bw = dma_perf_q.w_bw + num_bytes_written;
dma_events.num_bytes_written = num_bytes_written;
end
if (!axi_dma_res_i.w_ready && axi_dma_req_i.w_valid) begin
dma_perf_d.w_stall_cnt = dma_perf_q.w_stall_cnt + 'h1;
dma_events.w_stall = 1'b1;
end

// b
if (axi_dma_res_i.b_valid) begin
dma_perf_d.b_valid_cnt = dma_perf_q.b_valid_cnt + 'h1;
dma_events.b_valid = 1'b1;
end
if (axi_dma_req_i.b_ready) begin
dma_perf_d.b_ready_cnt = dma_perf_q.b_ready_cnt + 'h1;
dma_events.b_ready = 1'b1;
end
if (axi_dma_req_i.b_ready && axi_dma_res_i.b_valid) begin
dma_perf_d.b_done_cnt = dma_perf_q.b_done_cnt + 'h1;
dma_events.b_done = 1'b1;
end

// buffer
if ( axi_dma_res_i.w_ready && !axi_dma_req_i.w_valid) begin
dma_perf_d.buf_w_stall_cnt = dma_perf_q.buf_w_stall_cnt + 'h1;
dma_events.w_stall = 1'b1;
end
if (!axi_dma_req_i.r_ready && axi_dma_res_i.r_valid) begin
dma_perf_d.buf_r_stall_cnt = dma_perf_q.buf_r_stall_cnt + 'h1;
dma_events.r_stall = 1'b1;
end

// ids
dma_perf_d.next_id = 32'h0 + next_id_i;
dma_perf_d.completed_id = 32'h0 + completed_id_i;

// busy
if (dma_busy_i) dma_perf_d.dma_busy_cnt = dma_perf_q.dma_busy_cnt + 'h1;
if (dma_busy_i) begin
dma_perf_d.dma_busy_cnt = dma_perf_q.dma_busy_cnt + 'h1;
dma_events.dma_busy = 1'b1;
end
end

assign dma_events_o = dma_events;

if (EnablePerfCounters) begin : gen_perf_counters
`FF(dma_perf_q, dma_perf_d, 0);
assign dma_perf_o = dma_perf_q;
end

`FF(dma_perf_q, dma_perf_d, 0);
assign dma_perf_o = dma_perf_q;


endmodule
17 changes: 12 additions & 5 deletions src/frontend/inst64/axi_dma_tc_snitch_fe.sv
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ module axi_dma_tc_snitch_fe #(
parameter type axi_req_t = logic,
parameter type axi_res_t = logic,
parameter type acc_resp_t = logic,
parameter type dma_events_t = logic,
/// Derived parameter *Do not override*
parameter type addr_t = logic [AddrWidth-1:0],
parameter type data_t = logic [DataWidth-1:0]
Expand Down Expand Up @@ -50,7 +51,8 @@ module axi_dma_tc_snitch_fe #(
input logic [31:0] hart_id_i,

// performance output
output axi_dma_pkg::dma_perf_t dma_perf_o
output axi_dma_pkg::dma_perf_t dma_perf_o,
output dma_events_t dma_events_o
);

typedef logic [IdWidth-1:0] id_t;
Expand All @@ -62,6 +64,7 @@ module axi_dma_tc_snitch_fe #(
axi_pkg::burst_t burst_src, burst_dst;
logic decouple_rw;
logic deburst;
logic serialize;
} burst_req_t;

typedef struct packed {
Expand Down Expand Up @@ -137,8 +140,8 @@ module axi_dma_tc_snitch_fe #(
//--------------------------------------
// Buffer twod last
//--------------------------------------
localparam int unsigned TwodBufferDepth = 2 * DMAReqFifoDepth +
DMAAxiReqFifoDepth + 3 + 1;
localparam int unsigned TwodBufferDepth = 2 * (DMAReqFifoDepth +
DMAAxiReqFifoDepth) + 3 + 1;
logic twod_req_last_realigned;
fifo_v3 # (
.DATA_WIDTH ( 1 ),
Expand Down Expand Up @@ -176,7 +179,8 @@ module axi_dma_tc_snitch_fe #(
.TRANSFER_ID_WIDTH ( 32 ),
.DATA_WIDTH ( DMADataWidth ),
.axi_req_t ( axi_req_t ),
.axi_res_t ( axi_res_t )
.axi_res_t ( axi_res_t ),
.dma_events_t ( dma_events_t )
) i_axi_dma_perf_counters (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
Expand All @@ -185,7 +189,8 @@ module axi_dma_tc_snitch_fe #(
.next_id_i ( next_id ),
.completed_id_i ( completed_id ),
.dma_busy_i ( dma_busy_o ),
.dma_perf_o ( dma_perf_o )
.dma_perf_o ( dma_perf_o ),
.dma_events_o ( dma_events_o )
);

//--------------------------------------
Expand Down Expand Up @@ -263,6 +268,7 @@ module axi_dma_tc_snitch_fe #(
automatic logic [1:0] cfg;

// Parse the transfer parameters from the register or immediate.
cfg = '0;
unique casez (acc_qdata_op_i)
riscv_instr::DMCPYI : cfg = acc_qdata_op_i[24:20];
riscv_instr::DMCPY : cfg = acc_qdata_argb_i;
Expand Down Expand Up @@ -299,6 +305,7 @@ module axi_dma_tc_snitch_fe #(
automatic logic [1:0] status;

// Parse the status index from the register or immediate.
status = '0;
unique casez (acc_qdata_op_i)
riscv_instr::DMSTATI: status = acc_qdata_op_i[24:20];
riscv_instr::DMSTAT: status = acc_qdata_argb_i;
Expand Down

0 comments on commit cddd895

Please sign in to comment.