From 0ba9334cf923c254f292fae2c03cf63c6d30fbca Mon Sep 17 00:00:00 2001 From: Thomas Benz Date: Thu, 26 Oct 2023 16:00:08 +0200 Subject: [PATCH] frontend: Update descriptor-based frontend (#18, #26) * frontend: Create a dataflow-oriented descriptor-based iDMA frontend supporting prefetching. #18 * systems/cva6_desc: Various fixes #26 --------- Co-authored-by: Axel Vanoni Co-authored-by: iburaky-ids --- .gitignore | 1 + Bender.yml | 16 +- doc/src/frontend.rst | 4 +- doc/src/frontends/ariane_fe.rst | 8 - doc/src/frontends/descriptor_fe.rst | 17 + doc/src/frontends/register_fe.rst | 3 +- idma.mk | 5 + jobs/jobs.json | 20 + src/frontend/desc64/idma_desc64.hjson | 4 +- src/frontend/desc64/idma_desc64.sv | 537 ---------- src/frontend/desc64/idma_desc64_ar_gen.sv | 133 +++ .../desc64/idma_desc64_ar_gen_prefetch.sv | 308 ++++++ src/frontend/desc64/idma_desc64_cva6_synth.sv | 67 ++ .../desc64/idma_desc64_cva6_synth_pkg.sv | 30 + src/frontend/desc64/idma_desc64_cva6_wrap.sv | 334 ++++++ src/frontend/desc64/idma_desc64_reader.sv | 178 ++++ .../desc64/idma_desc64_reader_gater.sv | 59 ++ .../desc64/idma_desc64_reg_wrapper.sv | 44 +- src/frontend/desc64/idma_desc64_reshaper.sv | 63 ++ .../desc64/idma_desc64_shared_counter.sv | 58 -- src/frontend/desc64/idma_desc64_synth.sv | 100 +- src/frontend/desc64/idma_desc64_synth_pkg.sv | 41 +- src/frontend/desc64/idma_desc64_top.sv | 475 +++++++++ .../idma_reg_to_axi.sv} | 8 +- src/include/idma/tracer.svh | 101 ++ src/systems/cva6_desc/dma_desc_wrap.sv | 215 ---- test/frontend/tb_idma_desc64.sv | 593 ----------- test/frontend/tb_idma_desc64_bench.sv | 966 ++++++++++++++++++ test/frontend/tb_idma_desc64_top.sv | 720 +++++++++++++ util/trace_idma.py | 73 ++ 30 files changed, 3675 insertions(+), 1506 deletions(-) delete mode 100644 doc/src/frontends/ariane_fe.rst create mode 100644 doc/src/frontends/descriptor_fe.rst delete mode 100644 src/frontend/desc64/idma_desc64.sv create mode 100644 src/frontend/desc64/idma_desc64_ar_gen.sv create mode 100644 src/frontend/desc64/idma_desc64_ar_gen_prefetch.sv create mode 100644 src/frontend/desc64/idma_desc64_cva6_synth.sv create mode 100644 src/frontend/desc64/idma_desc64_cva6_synth_pkg.sv create mode 100644 src/frontend/desc64/idma_desc64_cva6_wrap.sv create mode 100644 src/frontend/desc64/idma_desc64_reader.sv create mode 100644 src/frontend/desc64/idma_desc64_reader_gater.sv create mode 100644 src/frontend/desc64/idma_desc64_reshaper.sv delete mode 100644 src/frontend/desc64/idma_desc64_shared_counter.sv create mode 100644 src/frontend/desc64/idma_desc64_top.sv rename src/{systems/cva6_desc/dma_reg_to_axi.sv => future/idma_reg_to_axi.sv} (96%) create mode 100644 src/include/idma/tracer.svh delete mode 100644 src/systems/cva6_desc/dma_desc_wrap.sv delete mode 100644 test/frontend/tb_idma_desc64.sv create mode 100644 test/frontend/tb_idma_desc64_bench.sv create mode 100644 test/frontend/tb_idma_desc64_top.sv create mode 100644 util/trace_idma.py diff --git a/.gitignore b/.gitignore index a0238a7d..350d24bf 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ nonfree working_dir bender morty +todo diff --git a/Bender.yml b/Bender.yml index 298ad372..991b30d3 100644 --- a/Bender.yml +++ b/Bender.yml @@ -45,6 +45,7 @@ sources: - src/future/idma_improved_fifo.sv - src/future/idma_legalizer_page_splitter.sv - src/future/idma_legalizer_pow2_splitter.sv + - src/future/idma_reg_to_axi.sv # Midends - target: rtl @@ -56,22 +57,28 @@ sources: - target: rtl files: # Level 0 - - src/frontend/desc64/idma_desc64_shared_counter.sv + - src/frontend/desc64/idma_desc64_ar_gen.sv + - src/frontend/desc64/idma_desc64_ar_gen_prefetch.sv + - src/frontend/desc64/idma_desc64_reader.sv + - src/frontend/desc64/idma_desc64_reader_gater.sv + - src/frontend/desc64/idma_desc64_reshaper.sv - src/frontend/idma_transfer_id_gen.sv # Level 1 - src/frontend/desc64/idma_desc64_reg_wrapper.sv # Level 2 - - src/frontend/desc64/idma_desc64.sv - + - src/frontend/desc64/idma_desc64_top.sv + - src/frontend/desc64/idma_desc64_cva6_wrap.sv # Synthesis wrappers - target: synth files: # Level 0 - src/frontend/desc64/idma_desc64_synth_pkg.sv + - src/frontend/desc64/idma_desc64_cva6_synth_pkg.sv - src/synth/idma_nd_backend_synth.sv # Level 1 - src/frontend/desc64/idma_desc64_synth.sv + - src/frontend/desc64/idma_desc64_cva6_synth.sv # Testbenches - target: test @@ -79,7 +86,8 @@ sources: # Level 0 - test/tb_idma_improved_fifo.sv - test/tb_idma_nd_backend.sv - - test/frontend/tb_idma_desc64.sv + - test/frontend/tb_idma_desc64_top.sv + - test/frontend/tb_idma_desc64_bench.sv - test/future/idma_tb_per2axi.sv - test/future/idma_obi_asserter.sv - test/future/TLToAXI4.v diff --git a/doc/src/frontend.rst b/doc/src/frontend.rst index 3ec7b95e..91f10b1c 100644 --- a/doc/src/frontend.rst +++ b/doc/src/frontend.rst @@ -9,11 +9,11 @@ Currently the following three frontends are planned and in development: - :doc:`Register Frontends `: Register-based configuration interface - :doc:`Snitch `: Snitch integration -- :doc:`Ariane/Linux `: An Ariane interface to allow use in a Linux system (not a priority yet) +- :doc:`Ariane/Linux `: An Ariane interface to allow use in a Linux system (not a priority yet) .. toctree:: :hidden: frontends/register_fe.rst frontends/snitch_fe.rst - frontends/ariane_fe.rst + frontends/descriptor_fe.rst diff --git a/doc/src/frontends/ariane_fe.rst b/doc/src/frontends/ariane_fe.rst deleted file mode 100644 index e57ccc02..00000000 --- a/doc/src/frontends/ariane_fe.rst +++ /dev/null @@ -1,8 +0,0 @@ -Ariane/Linux Frontend -===================== - -Frontend for Ariane (CVA6) ready for Linux use. - -.. only:: html - -- `32bit 2D register frontend <../regs/idma_desc64.html>`_ diff --git a/doc/src/frontends/descriptor_fe.rst b/doc/src/frontends/descriptor_fe.rst new file mode 100644 index 00000000..09bdae0b --- /dev/null +++ b/doc/src/frontends/descriptor_fe.rst @@ -0,0 +1,17 @@ +Linux Frontend +===================== + +Frontend for Ariane (CVA6) ready for Linux use. + +.. only:: html + +- `descriptor-based frontend <../regs/idma_desc64.html>`_ + +Morty docs: + +.. only:: html + +- `Morty docs <../idma_desc64_synth/index.html>`_ + +.. image:: ../../fig/graph/idma_desc64_synth.png + :width: 600 diff --git a/doc/src/frontends/register_fe.rst b/doc/src/frontends/register_fe.rst index 92107d85..2d9a2d27 100644 --- a/doc/src/frontends/register_fe.rst +++ b/doc/src/frontends/register_fe.rst @@ -11,6 +11,5 @@ Currently supported are: .. only:: html -- `32bit 2D register frontend <../regs/idma_reg32_2d.html>`_ -- `64bit register frontend <../regs/idma_reg64.html>`_ +- `32bit 3D register frontend <../regs/idma_reg32_3d.html>`_ - `64bit 2D register frontend <../regs/idma_reg64_2d.html>`_ diff --git a/idma.mk b/idma.mk index aabdac6f..0db0a202 100644 --- a/idma.mk +++ b/idma.mk @@ -228,6 +228,11 @@ IDMA_RTL_DOC_ALL += $(IDMA_DOC_FIG_DIR)/graph/idma_nd_backend_synth.png IDMA_RTL_DOC_ALL += $(IDMA_HTML_DIR)/idma_nd_backend_synth/index.html IDMA_PICKLE_ALL += $(IDMA_PICKLE_DIR)/idma_nd_backend_synth.sv +# descriptor-based frontend +IDMA_RTL_DOC_ALL += $(IDMA_DOC_FIG_DIR)/graph/idma_desc64_synth.png +IDMA_RTL_DOC_ALL += $(IDMA_HTML_DIR)/idma_desc64_synth/index.html +IDMA_PICKLE_ALL += $(IDMA_PICKLE_DIR)/idma_desc64_synth.sv + # -------------- # QuestaSim diff --git a/jobs/jobs.json b/jobs/jobs.json index f2604a1f..4ad85c42 100644 --- a/jobs/jobs.json +++ b/jobs/jobs.json @@ -131,5 +131,25 @@ "proc_id" : "rw_axi", "testbench": "tb_idma_nd_backend", "synth_top": "idma_nd_backend_synth" + }, + "desc64": { + "jobs" : { + "simple" : "jobs.json" + }, + "params" : { + }, + "proc_id" : "rw_axi", + "testbench" : "tb_idma_desc64_top", + "synth_top" : "idma_desc64_synth" + }, + "desc64_dma": { + "jobs" : { + "simple" : "jobs.json" + }, + "params" : { + }, + "proc_id" : "rw_axi", + "testbench" : "tb_idma_desc64_bench", + "synth_top" : "idma_desc64_synth" } } diff --git a/src/frontend/desc64/idma_desc64.hjson b/src/frontend/desc64/idma_desc64.hjson index 44218f2a..5f2a8cdc 100644 --- a/src/frontend/desc64/idma_desc64.hjson +++ b/src/frontend/desc64/idma_desc64.hjson @@ -1,9 +1,9 @@ -// Copyright 2022 ETH Zurich and University of Bologna. +// Copyright 2023 ETH Zurich and University of Bologna. // Solderpad Hardware License, Version 0.51, see LICENSE for details. // SPDX-License-Identifier: SHL-0.51 // Authors: -// - Axel Vanoni +// - Axel Vanoni { name: idma_desc64 diff --git a/src/frontend/desc64/idma_desc64.sv b/src/frontend/desc64/idma_desc64.sv deleted file mode 100644 index cde0a4e7..00000000 --- a/src/frontend/desc64/idma_desc64.sv +++ /dev/null @@ -1,537 +0,0 @@ -// Copyright 2022 ETH Zurich and University of Bologna. -// Solderpad Hardware License, Version 0.51, see LICENSE for details. -// SPDX-License-Identifier: SHL-0.51 - -// Authors: -// - Axel Vanoni - -`include "common_cells/registers.svh" - -/// This module serves as a descriptor-based frontend for the iDMA in the CVA6-core -module idma_desc64 #( - /// Width of the addresses - parameter int unsigned AddrWidth = 64 , - /// burst request type. See the documentation of the idma backend for details - parameter type burst_req_t = logic, - /// regbus interface types. Use the REG_BUS_TYPEDEF macros to define the types - /// or see the idma backend documentation for more details - parameter type reg_rsp_t = logic, - parameter type reg_req_t = logic, - /// Specifies the depth of the fifo behind the descriptor address register - parameter int unsigned InputFifoDepth = 8, - /// Specifies the buffer size of the fifo that tracks requests submitted to the backend - parameter int unsigned PendingFifoDepth = 8, - /// Specifies the counter width of the buffer that tracks completions delivered by the backend - parameter int unsigned TxDoneBufferWidth = 5 -)( - /// clock - input logic clk_i , - /// reset - input logic rst_ni , - - /// regbus interface - /// master pair - /// master request - output reg_req_t master_req_o , - /// master response - input reg_rsp_t master_rsp_i , - /// slave pair - /// The slave interface exposes two registers: One address register to - /// write a descriptor address to process and a status register that - /// exposes whether the DMA is busy on bit 0 and whether FIFOs are full - /// on bit 1. - /// master request - input reg_req_t slave_req_i , - /// master response - output reg_rsp_t slave_rsp_o , - - /// backend interface - /// burst request submission - /// burst request data. See iDMA backend documentation for fields - output burst_req_t dma_be_req_o , - /// valid signal for the backend data submission - output logic dma_be_valid_o , - /// ready signal for the backend data submission - input logic dma_be_ready_i , - /// status information from the backend - /// event: when a transfer has completed - input logic dma_be_tx_complete_i, - /// whether the backend is currently idle - input logic dma_be_idle_i , - - /// Event: irq - output logic irq_o -); - - import idma_desc64_reg_pkg::*; - import axi_pkg::BURST_INCR; - - // {{{ typedefs and parameters - typedef logic [AddrWidth-1:0] addr_t; - - /// Descriptor layout - typedef struct packed { - /// Flags for this request. Currently, the following are defined: - /// bit 0 set to trigger an irq on completion, unset to not be notified - /// bits 2:1 burst type for source, fixed: 00, incr: 01, wrap: 10 - /// bits 4:3 burst type for destination, fixed: 00, incr: 01, wrap: 10 - /// for a description of these modes, check AXI-Pulp documentation - /// bit 5 set to decouple reads and writes in the backend - /// bit 6 set to serialize requests. Not setting might violate AXI spec - /// bit 7 set to deburst (each burst is split into own transfer) - /// for a more thorough description, refer to the iDMA backend documentation - /// bits 11:8 Bitfield for AXI cache attributes for the source - /// bits 15:12 Bitfield for AXI cache attributes for the destination - /// bits of the bitfield (refer to AXI-Pulp for a description): - /// bit 0: cache bufferable - /// bit 1: cache modifiable - /// bit 2: cache read alloc - /// bit 3: cache write alloc - /// bits 23:16 AXI ID used for the transfer - /// bits 31:24 unused/reserved - logic [31:0] flags; - /// length of request in bytes - logic [31:0] length; - /// address of next descriptor, 0xFFFF_FFFF_FFFF_FFFF for last descriptor in chain - addr_t next; - /// source address to copy from - addr_t src_addr; - /// destination address to copy to - addr_t dest_addr; - } descriptor_t; - - typedef struct packed { - logic do_irq; - addr_t descriptor_addr; - } addr_irq_t; - - localparam addr_t AddressSentinel = ~'0; - - typedef enum logic [1:0] { - SubmitterIdle = '0, - SubmitterFetchDescriptor, - SubmitterSendToBE - } submitter_e; - - typedef enum logic [1:0] { - FeedbackIdle, - FeedbackWaitingOnBackend, - FeedbackUpdateMemory, - FeedbackRaiseIRQ - } feedback_fsm_e; - - // }}} typedefs and parameters - - // {{{ signal declarations - - // {{{ descriptor addr input to fifo - idma_desc64_reg2hw_t register_file_to_hw; - idma_desc64_hw2reg_t register_file_to_reg; - - addr_t desc_addr_to_input_fifo_data; - logic desc_addr_to_input_fifo_valid; - logic desc_addr_to_input_fifo_ready; - - addr_t desc_addr_from_input_fifo_data; - logic desc_addr_from_input_fifo_valid; - logic desc_addr_from_input_fifo_ready; - - logic [2:0] desc_addr_fifo_usage; - // }}} descriptor addr input to fifo - - // {{{ pending descriptor FIFO - addr_irq_t pending_descriptor_to_fifo_data; - logic pending_descriptor_to_fifo_valid; - logic pending_descriptor_to_fifo_ready; - - addr_irq_t pending_descriptor_from_fifo_data; - logic pending_descriptor_from_fifo_valid; - logic pending_descriptor_from_fifo_ready; - // }}} pending descriptor FIFO - - // {{{ submitter FSM - // state - submitter_e submitter_q, submitter_d; - logic [1:0] submitter_fetch_counter_q, submitter_fetch_counter_d; - // data - addr_t submitter_current_addr_q, submitter_current_addr_d; - descriptor_t submitter_current_descriptor_q, submitter_current_descriptor_d; - burst_req_t submitter_burst_req; - // register_interface master - reg_req_t submitter_master_req; - reg_rsp_t submitter_master_rsp; - // ready-valid signals - logic submitter_input_fifo_ready; - logic submitter_input_fifo_valid; - logic submitter_burst_valid_q, submitter_burst_valid_d; - logic submitter_pending_fifo_valid_q, submitter_pending_fifo_valid_d; - // }}} submitter FSM - - // {{{ instantiated modules - logic completion_counter_decrement; - logic completion_counter_has_items; - // }}} instantiated modules - - // {{{ feedback FSM - // state - feedback_fsm_e feedback_fsm_q, feedback_fsm_d; - // data - addr_irq_t feedback_addr_irq_q, feedback_addr_irq_d; - logic feedback_irq_q, feedback_irq_d; - // register_interface master - reg_req_t feedback_master_req_q, feedback_master_req_d; - reg_rsp_t feedback_master_rsp; - // ready-valid signals - logic feedback_pending_descriptor_ready_q, feedback_pending_descriptor_ready_d; - logic feedback_counter_ready_q, feedback_counter_ready_d; - // }}} feedback FSM - - // }}} signal declarations - - // {{{ combinatorial processes - - // {{{ descriptor addr input to fifo - assign desc_addr_to_input_fifo_data = register_file_to_hw.desc_addr.q; - // }}} descriptor addr input to fifo - - // {{{ submitter FSM - assign desc_addr_from_input_fifo_ready = submitter_q == SubmitterIdle; - assign submitter_input_fifo_valid = desc_addr_from_input_fifo_valid; - - assign pending_descriptor_to_fifo_valid = submitter_pending_fifo_valid_q; - assign submitter_master_req.addr = submitter_current_addr_q - + (submitter_fetch_counter_q << 3); - assign submitter_master_req.write = '0; - assign submitter_master_req.wdata = '0; - assign submitter_master_req.wstrb = '0; - assign submitter_master_req.valid = submitter_q == SubmitterFetchDescriptor; - - assign pending_descriptor_to_fifo_data.do_irq = submitter_current_descriptor_q.flags[0]; - assign pending_descriptor_to_fifo_data.descriptor_addr = submitter_current_addr_q; - - always_comb begin : proc_submitter_burst_req - submitter_burst_req = '0; - - submitter_burst_req.length = submitter_current_descriptor_q.length; - submitter_burst_req.src_addr = submitter_current_descriptor_q.src_addr; - submitter_burst_req.dst_addr = submitter_current_descriptor_q.dest_addr; - - // Current backend only supports one ID - submitter_burst_req.opt.axi_id = submitter_current_descriptor_q.flags[23:16]; - submitter_burst_req.opt.src.burst = submitter_current_descriptor_q.flags[2:1]; - submitter_burst_req.opt.src.cache = submitter_current_descriptor_q.flags[11:8]; - // AXI4 does not support locked transactions, use atomics - submitter_burst_req.opt.src.lock = '0; - // unpriviledged, secure, data access - submitter_burst_req.opt.src.prot = '0; - // not participating in qos - submitter_burst_req.opt.src.qos = '0; - // only one region - submitter_burst_req.opt.src.region = '0; - submitter_burst_req.opt.dst.burst = submitter_current_descriptor_q.flags[4:3]; - submitter_burst_req.opt.dst.cache = submitter_current_descriptor_q.flags[15:12]; - // AXI4 does not support locked transactions, use atomics - submitter_burst_req.opt.dst.lock = '0; - // unpriviledged, secure, data access - submitter_burst_req.opt.dst.prot = '0; - // not participating in qos - submitter_burst_req.opt.dst.qos = '0; - // only one region in system - submitter_burst_req.opt.dst.region = '0; - // ensure coupled AW to avoid deadlocks - submitter_burst_req.opt.beo.decouple_aw = '0; - submitter_burst_req.opt.beo.decouple_rw = submitter_current_descriptor_q.flags[5]; - // this frontend currently only supports completely debursting - submitter_burst_req.opt.beo.src_max_llen = '0; - // this frontend currently only supports completely debursting - submitter_burst_req.opt.beo.dst_max_llen = '0; - submitter_burst_req.opt.beo.src_reduce_len = submitter_current_descriptor_q.flags[7]; - submitter_burst_req.opt.beo.dst_reduce_len = submitter_current_descriptor_q.flags[7]; - // serialization no longer supported - // submitter_burst_req.serialize = submitter_current_descriptor_q.flags[6]; - end - - always_comb begin : submitter_fsm - submitter_d = submitter_q; - submitter_current_addr_d = submitter_current_addr_q; - submitter_current_descriptor_d = submitter_current_descriptor_q; - submitter_burst_valid_d = submitter_burst_valid_q; - submitter_pending_fifo_valid_d = submitter_pending_fifo_valid_q; - submitter_fetch_counter_d = submitter_fetch_counter_q; - - unique case (submitter_q) - SubmitterIdle: begin - if (submitter_input_fifo_valid) begin - submitter_current_addr_d = desc_addr_from_input_fifo_data; - - submitter_d = SubmitterFetchDescriptor; - submitter_fetch_counter_d = '0; - end - end - SubmitterFetchDescriptor: begin - if (submitter_master_rsp.ready) begin - submitter_fetch_counter_d = submitter_fetch_counter_q + 1; - unique case (submitter_fetch_counter_q) - 2'b00: begin - submitter_current_descriptor_d.flags = submitter_master_rsp.rdata[63:32]; - submitter_current_descriptor_d.length = submitter_master_rsp.rdata[31:0]; - end - 2'b01: begin - submitter_current_descriptor_d.next = submitter_master_rsp.rdata; - end - 2'b10: begin - submitter_current_descriptor_d.src_addr = submitter_master_rsp.rdata; - end - 2'b11: begin - submitter_current_descriptor_d.dest_addr = submitter_master_rsp.rdata; - submitter_fetch_counter_d = '0; - submitter_d = SubmitterSendToBE; - submitter_burst_valid_d = 1'b1; - submitter_pending_fifo_valid_d = 1'b1; - end - default: begin - submitter_d = submitter_e'('X); - submitter_current_addr_d = 'X; - submitter_current_descriptor_d = 'X; - submitter_burst_valid_d = 'X; - submitter_pending_fifo_valid_d = 'X; - submitter_fetch_counter_d = 'X; - end - endcase - end - end - SubmitterSendToBE: begin - // Unset valid once the ready signal came. We can't use !ready, - // as we might be waiting on the other signal, while the - // first ready goes low again, marking our signal erroniously as valid. - if (pending_descriptor_to_fifo_ready) submitter_pending_fifo_valid_d = 1'b0; - if (dma_be_ready_i) submitter_burst_valid_d = 1'b0; - - if ((submitter_burst_valid_q == 1'b0 || dma_be_ready_i == 1'b1) && - (submitter_pending_fifo_valid_q == 1'b0 || pending_descriptor_to_fifo_ready == 1'b1)) begin - - submitter_current_descriptor_d = '0; - - if (submitter_current_descriptor_q.next == AddressSentinel) begin - submitter_d = SubmitterIdle; - end else begin - submitter_d = SubmitterFetchDescriptor; - submitter_current_addr_d = submitter_current_descriptor_q.next; - submitter_fetch_counter_d = '0; - end - end - end - default: begin - submitter_d = submitter_e'('X); - submitter_current_addr_d = 'X; - submitter_current_descriptor_d = 'X; - submitter_burst_valid_d = 'X; - submitter_pending_fifo_valid_d = 'X; - submitter_fetch_counter_d = 'X; - end - endcase - end : submitter_fsm - // }}} submitter FSM - - // {{{ feedback FSM - assign pending_descriptor_from_fifo_ready = feedback_pending_descriptor_ready_q; - assign completion_counter_decrement = feedback_counter_ready_q; - - always_comb begin : feedback_fsm - feedback_fsm_d = feedback_fsm_q; - feedback_addr_irq_d = feedback_addr_irq_q; - feedback_master_req_d = feedback_master_req_q; - feedback_irq_d = '0; - feedback_pending_descriptor_ready_d = '0; - feedback_counter_ready_d = '0; - - unique case (feedback_fsm_q) - FeedbackIdle: begin - feedback_pending_descriptor_ready_d = 1'b1; - if (pending_descriptor_from_fifo_valid) begin - feedback_addr_irq_d = pending_descriptor_from_fifo_data; - - feedback_fsm_d = FeedbackWaitingOnBackend; - end - end - FeedbackWaitingOnBackend: begin - if (completion_counter_has_items) begin - feedback_counter_ready_d = 1'b1; - feedback_fsm_d = FeedbackUpdateMemory; - end - end - FeedbackUpdateMemory: begin - if (feedback_master_req_q.valid == '0) begin - // overwrite the flags and length fields with all 1s - // to mark it as completed - feedback_master_req_d.addr = feedback_addr_irq_q.descriptor_addr; - feedback_master_req_d.write = 1'b1; - feedback_master_req_d.wdata = ~'0; - feedback_master_req_d.wstrb = ~'0; - feedback_master_req_d.valid = 1'b1; - end else if (feedback_master_rsp.ready == 1'b1) begin - feedback_master_req_d.write = '0; - feedback_master_req_d.valid = '0; - if (feedback_addr_irq_q.do_irq) begin - feedback_fsm_d = FeedbackRaiseIRQ; - end else begin - feedback_fsm_d = FeedbackIdle; - end - end - end - FeedbackRaiseIRQ: begin - feedback_irq_d = 1'b1; - feedback_fsm_d = FeedbackIdle; - end - default: begin - feedback_fsm_d = feedback_fsm_e'('X); - feedback_addr_irq_d = 'X; - feedback_master_req_d = 'X; - feedback_irq_d = 'X; - feedback_pending_descriptor_ready_d = 'X; - feedback_counter_ready_d = 'X; - end - endcase - end : feedback_fsm - // }}} feedback FSM - - // {{{ status update - assign register_file_to_reg.status.busy.d = (submitter_q != SubmitterIdle || - feedback_fsm_q != FeedbackIdle || - !dma_be_idle_i); - assign register_file_to_reg.status.busy.de = 1'b1; - - // leave a bit of wiggle room for the previous registers to catch up - assign register_file_to_reg.status.fifo_full.d = desc_addr_fifo_usage > 6; - assign register_file_to_reg.status.fifo_full.de = 1'b1; - // }}} status update - - // }}} combinatorial processes - - // {{{ instantiated modules - - // {{{ descriptor addr input to fifo - stream_fifo #( - .DATA_WIDTH (64) , - .DEPTH (InputFifoDepth) - ) i_descriptor_input_fifo ( - .clk_i, - .rst_ni, - .flush_i (1'b0) , - .testmode_i (1'b0) , - .usage_o (desc_addr_fifo_usage) , - // input port - .data_i (desc_addr_to_input_fifo_data) , - .valid_i (desc_addr_to_input_fifo_valid) , - .ready_o (desc_addr_to_input_fifo_ready) , - // output port - .data_o (desc_addr_from_input_fifo_data) , - .valid_o (desc_addr_from_input_fifo_valid), - .ready_i (desc_addr_from_input_fifo_ready) - ); - idma_desc64_reg_wrapper #( - .reg_req_t (reg_req_t), - .reg_rsp_t (reg_rsp_t) - ) i_register_file_controller ( - .clk_i (clk_i) , - .rst_ni (rst_ni) , - .reg_req_i (slave_req_i) , - .reg_rsp_o (slave_rsp_o) , - .reg2hw_o (register_file_to_hw) , - .hw2reg_i (register_file_to_reg) , - .devmode_i (1'b1) , - .descriptor_fifo_ready_i(desc_addr_to_input_fifo_ready), - .descriptor_fifo_valid_o(desc_addr_to_input_fifo_valid) - ); - // }}} descriptor addr input to fifo - - // {{{ pending descriptor FIFO - stream_fifo #( - .T (addr_irq_t) , - .DEPTH (PendingFifoDepth) - ) i_pending_descriptor_fifo ( - .clk_i, - .rst_ni, - .flush_i (1'b0) , - .testmode_i (1'b0) , - .usage_o (/* don't care for now */) , - .data_i (pending_descriptor_to_fifo_data) , - .valid_i (pending_descriptor_to_fifo_valid) , - .ready_o (pending_descriptor_to_fifo_ready) , - .data_o (pending_descriptor_from_fifo_data) , - .valid_o (pending_descriptor_from_fifo_valid), - .ready_i (pending_descriptor_from_fifo_ready) - ); - // }}} pending descriptor FIFO - - // {{{ counter module - idma_desc64_shared_counter #( - .CounterWidth(TxDoneBufferWidth) - ) i_completion_counter ( - .clk_i (clk_i) , - .rst_ni (rst_ni) , - .increment_i (dma_be_tx_complete_i) , - .decrement_i (completion_counter_decrement), - .greater_than_zero_o(completion_counter_has_items) - ); - // }}} counter module - - // {{{ regbus master arbitration - reg_mux #( - .NoPorts (2) , - .AW (AddrWidth), - .DW (AddrWidth), - .req_t (reg_req_t), - .rsp_t (reg_rsp_t) - ) i_master_arbitration ( - .clk_i (clk_i) , - .rst_ni (rst_ni) , - .in_req_i ({submitter_master_req, feedback_master_req_q}), - .in_rsp_o ({submitter_master_rsp, feedback_master_rsp}) , - .out_req_o(master_req_o) , - .out_rsp_i(master_rsp_i) - ); - // }}} regbus master arbitration - - // }}} instantiated modules - - // {{{ state-holding processes - - // {{{ submitter FSM - // state - `FF(submitter_q, submitter_d, SubmitterIdle); - `FF(submitter_fetch_counter_q, submitter_fetch_counter_d, '0); - - // data - `FF(submitter_current_addr_q, submitter_current_addr_d, '0); - `FF(submitter_current_descriptor_q, submitter_current_descriptor_d, '{default: '0}); - - // ready-valid signals - `FF(submitter_burst_valid_q, submitter_burst_valid_d, '0); - `FF(submitter_pending_fifo_valid_q, submitter_pending_fifo_valid_d, '0); - // }}} submitter FSM - - // {{{ feedback FSM - `FF(feedback_fsm_q, feedback_fsm_d, FeedbackIdle); - - // data - `FF(feedback_addr_irq_q, feedback_addr_irq_d, '0); - `FF(feedback_irq_q, feedback_irq_d, '0); - - // register_interface master request - `FF(feedback_master_req_q, feedback_master_req_d, '{default: '0}); - - // ready-valid signals - `FF(feedback_pending_descriptor_ready_q, feedback_pending_descriptor_ready_d, '0); - `FF(feedback_counter_ready_q, feedback_counter_ready_d, '0); - // }}} feedback FSM - - // }}} state-holding processes - - // {{{ output assignments - assign dma_be_req_o = submitter_burst_req; - assign dma_be_valid_o = submitter_burst_valid_q; - assign irq_o = feedback_irq_q; - // }}} output assignments - -endmodule diff --git a/src/frontend/desc64/idma_desc64_ar_gen.sv b/src/frontend/desc64/idma_desc64_ar_gen.sv new file mode 100644 index 00000000..ef97f2d3 --- /dev/null +++ b/src/frontend/desc64/idma_desc64_ar_gen.sv @@ -0,0 +1,133 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Authors: +// - Axel Vanoni + +`include "common_cells/assertions.svh" +`include "common_cells/registers.svh" + +/// This module generates AR packets to fetch descriptors from memory +module idma_desc64_ar_gen #( + /// AXI Data width + parameter int unsigned DataWidth = 64, + /// Descriptor type. `$bits(descriptor_t)` must be a power of two + parameter type descriptor_t = logic, + /// AXI AR channel type + parameter type axi_ar_chan_t = logic, + /// AXI AR id type + parameter type axi_id_t = logic, + /// Type that can hold the usage information of the idma_req fifo + parameter type usage_t = logic, + /// AXI Address type + parameter type addr_t = logic +)( + /// Clock + input logic clk_i, + /// Reset + input logic rst_ni, + /// AXI AR channel + output axi_ar_chan_t axi_ar_chan_o, + /// AXI AR valid + output logic axi_ar_chan_valid_o, + /// AXI AR ready + input logic axi_ar_chan_ready_i, + /// AXI ID to use when requesting + input axi_id_t axi_ar_id_i, + /// queued address to use when we reach the last in a chain + input addr_t queued_address_i, + /// queued address valid + input logic queued_address_valid_i, + /// queued address ready + output logic queued_address_ready_o, + /// next address as read from descriptor + input addr_t next_address_from_descriptor_i, + /// next address valid + input logic next_address_from_descriptor_valid_i, + /// number of available slots in the idma request fifo + input usage_t idma_req_available_slots_i, + /// address for feedback for the next request + output addr_t feedback_addr_o, + /// feedback address valid + output logic feedback_addr_valid_o, + /// whether the unit is busy + output logic busy_o +); + +`define MIN(a, b) ((a) < (b) ? a : b) + +localparam int unsigned DataWidthBytes = DataWidth / 8; +localparam int unsigned DescriptorSize = $bits(descriptor_t) / 8; + +localparam logic [2:0] AxiSize = `MIN(`MIN($clog2(DataWidthBytes), + $clog2(DescriptorSize)), 3'b111); +localparam logic [7:0] AxiLength = DescriptorSize / DataWidthBytes - 1; + +logic inflight_q, inflight_d; +logic next_addr_from_desc_valid_q, next_addr_from_desc_valid_d; +logic next_addr_from_desc_valid_this_cycle; +logic take_from_queued; +logic may_send_ar; +addr_t next_addr_q, next_addr_d; +addr_t ar_addr; + +assign next_addr_from_desc_valid_d = next_address_from_descriptor_valid_i; +assign next_addr_from_desc_valid_this_cycle = !next_addr_from_desc_valid_q && + next_address_from_descriptor_valid_i; + +assign next_addr_d = next_addr_from_desc_valid_this_cycle ? + next_address_from_descriptor_i : + next_addr_q; + +assign take_from_queued = (next_addr_from_desc_valid_this_cycle ? + next_address_from_descriptor_i == '1 : + next_addr_q == '1); + +assign ar_addr = take_from_queued ? queued_address_i : + (next_addr_from_desc_valid_this_cycle ? + next_address_from_descriptor_i : next_addr_q); + +assign may_send_ar = idma_req_available_slots_i > 0 && + (!inflight_q || next_addr_from_desc_valid_this_cycle); + +always_comb begin : proc_inflight + inflight_d = inflight_q; + if (axi_ar_chan_ready_i && axi_ar_chan_valid_o) begin + inflight_d = 1'b1; + end else if (next_addr_from_desc_valid_this_cycle) begin + inflight_d = 1'b0; + end +end + +always_comb begin : proc_ready_valid + axi_ar_chan_valid_o = 1'b0; + queued_address_ready_o = 1'b0; + if (may_send_ar) begin + if (take_from_queued) begin + axi_ar_chan_valid_o = queued_address_valid_i; + queued_address_ready_o = axi_ar_chan_ready_i; + end else begin + axi_ar_chan_valid_o = 1'b1; + end + end +end + +always_comb begin : proc_ar + axi_ar_chan_o = '0; + axi_ar_chan_o.id = axi_ar_id_i; + axi_ar_chan_o.addr = ar_addr; + axi_ar_chan_o.len = AxiLength; + axi_ar_chan_o.size = AxiSize; + axi_ar_chan_o.burst = axi_pkg::BURST_INCR; +end + +`FF(inflight_q, inflight_d, 1'b0); +`FF(next_addr_from_desc_valid_q, next_addr_from_desc_valid_d, 1'b0); +`FF(next_addr_q, next_addr_d, '1); + +assign feedback_addr_o = ar_addr; +assign feedback_addr_valid_o = axi_ar_chan_ready_i && axi_ar_chan_valid_o; +assign busy_o = !take_from_queued || inflight_q; + +endmodule diff --git a/src/frontend/desc64/idma_desc64_ar_gen_prefetch.sv b/src/frontend/desc64/idma_desc64_ar_gen_prefetch.sv new file mode 100644 index 00000000..b4ba4a70 --- /dev/null +++ b/src/frontend/desc64/idma_desc64_ar_gen_prefetch.sv @@ -0,0 +1,308 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Authors: +// - Axel Vanoni + +`include "common_cells/assertions.svh" +`include "common_cells/registers.svh" + +/// This module generates AR packets to fetch descriptors from memory +module idma_desc64_ar_gen_prefetch #( + /// AXI Data width + parameter int unsigned DataWidth = 64, + /// How many descriptors may be prefetched + parameter int unsigned NSpeculation = 0, + /// Descriptor type. `$bits(descriptor_t)` must be a power of two + parameter type descriptor_t = logic, + /// AXI AR channel type + parameter type axi_ar_chan_t = logic, + /// AXI AR id type + parameter type axi_id_t = logic, + /// Type that can hold the usage information of the idma_req fifo + parameter type usage_t = logic, + /// AXI Address type + parameter type addr_t = logic, + /// Type that can hold how many descriptors to flush on the R channel. + /// Do not override. + parameter type flush_t = logic [$clog2(NSpeculation + 1)-1:0] +)( + /// Clock + input logic clk_i, + /// Reset + input logic rst_ni, + /// AXI AR channel + output axi_ar_chan_t axi_ar_chan_o, + /// AXI AR valid + output logic axi_ar_chan_valid_o, + /// AXI AR ready + input logic axi_ar_chan_ready_i, + /// AXI ID to use when requesting + input axi_id_t axi_ar_id_i, + /// queued address to use when we reach the last in a chain + input addr_t queued_address_i, + /// queued address valid + input logic queued_address_valid_i, + /// queued address ready + output logic queued_address_ready_o, + /// next address as read from descriptor + input addr_t next_address_from_descriptor_i, + /// next address valid + input logic next_address_from_descriptor_valid_i, + /// number of available slots in the idma request fifo + input usage_t idma_req_available_slots_i, + /// number of requests to flush on the R channel + output flush_t n_requests_to_flush_o, + /// if asserted, flush `n_requests_to_flush_o` on the R channel + output logic n_requests_to_flush_valid_o, + /// address for feedback for the next request + output addr_t feedback_addr_o, + /// feedback address valid + output logic feedback_addr_valid_o, + /// whether the unit is busy + output logic busy_o +); + +`define MIN(a, b) ((a) < (b) ? a : b) + +localparam int unsigned DataWidthBytes = DataWidth / 8; +localparam int unsigned DescriptorSize = $bits(descriptor_t) / 8; + +// We need the descriptor to have a power of two size for easy multiplication +// when calculating the next address +// pragma translate_off +`ASSERT_INIT(DescriptorSizeIsPowerOfTwo, (32'd1 << $clog2(DescriptorSize)) == DescriptorSize) +// pragma translate_on + +localparam logic [2:0] AxiSize = `MIN(`MIN($clog2(DataWidthBytes), + $clog2(DescriptorSize)), 3'b111); +localparam logic [7:0] AxiLength = DescriptorSize / DataWidthBytes - 1; + +localparam int unsigned SpeculationWidth = $clog2(NSpeculation + 1); +localparam int unsigned SpeculationUsageWidth = $clog2(NSpeculation); + +typedef struct packed { + logic speculative; + addr_t addr; +} addr_spec_t; + +addr_t base_addr_q, base_addr_d; +logic base_valid_q, base_valid_d; +logic take_from_next; + +logic unblocked; +logic next_addr_valid_q, next_addr_valid_d; +logic next_addr_valid_this_cycle; + +addr_spec_t next_ar; +logic next_ar_valid, next_ar_ready; + +addr_spec_t staging_addr; +logic staging_addr_valid_pending, staging_addr_ready_pending; +addr_t staging_addr_legalization; +logic staging_addr_valid_legalization, staging_addr_ready_legalization; +logic staging_addr_valid_speculation, staging_addr_ready_speculation; + +addr_t addr_out; + + +logic [SpeculationWidth:0] inflight_counter_q, inflight_counter_d; +logic flush; +logic flush_d, flush_q; +logic commit; +logic speculation_correct; +logic legalization_usage; +logic idma_enough_slots; +addr_t speculation_addr; +addr_t speculation_check_addr; +logic speculation_ready, speculation_valid; +logic [SpeculationUsageWidth-1:0] speculation_usage_short; +logic [SpeculationWidth-1:0] speculation_usage; + +assign take_from_next = base_valid_q && (next_address_from_descriptor_i != '1); + +assign unblocked = (NSpeculation > inflight_counter_q) && + (idma_req_available_slots_i > inflight_counter_q); + +assign next_ar_valid = unblocked && base_valid_q; +assign next_ar.speculative = inflight_counter_q > 0; +assign next_ar.addr = base_addr_q + (inflight_counter_q << $clog2(DescriptorSize)); + +assign staging_addr_valid_legalization = flush ? idma_req_available_slots_i > '0 && + (next_address_from_descriptor_i == '1 ? + queued_address_valid_i : 1'b1) : + staging_addr_valid_pending && + ((staging_addr_ready_speculation && !flush_q) || + !staging_addr.speculative); +assign staging_addr_ready_pending = staging_addr_ready_legalization && + ((staging_addr_ready_speculation && !flush_q) || + !staging_addr.speculative) && + !flush; +assign staging_addr_valid_speculation = staging_addr_valid_pending && + staging_addr_ready_legalization && + staging_addr.speculative && + !flush && !flush_q; + +assign next_addr_valid_d = next_address_from_descriptor_valid_i; +assign next_addr_valid_this_cycle = next_address_from_descriptor_valid_i && !next_addr_valid_q; + +assign staging_addr_legalization = flush ? ( + next_address_from_descriptor_i == '1 ? queued_address_i : + next_address_from_descriptor_i ) : staging_addr.addr; + +assign speculation_check_addr = speculation_valid ? speculation_addr : next_ar.addr; + +assign speculation_correct = next_address_from_descriptor_i == '1 ? + (queued_address_valid_i && speculation_check_addr == queued_address_i) : + speculation_check_addr == next_address_from_descriptor_i; + +assign flush = next_addr_valid_this_cycle && !speculation_correct; +assign commit = next_addr_valid_this_cycle && speculation_correct; + +assign speculation_ready = commit; + +assign idma_enough_slots = idma_req_available_slots_i > inflight_counter_q && + inflight_counter_q < NSpeculation; + +// handle case of NSpeculation being power of 2 +always_comb begin : proc_usage + speculation_usage = speculation_usage_short; + // we can't distinguish between max and empty if readys and valids are on + // at the same time! + if (speculation_usage_short == '0 && speculation_valid) begin + speculation_usage = NSpeculation; + end +end + +always_comb begin : proc_base_valid + base_valid_d = base_valid_q; + if (queued_address_valid_i) begin + base_valid_d = 1'b1; + end else if (!queued_address_valid_i && + next_addr_valid_this_cycle && next_address_from_descriptor_i == '1) begin + base_valid_d = 1'b0; + end +end + +always_comb begin : proc_base_addr + base_addr_d = base_addr_q; + if (take_from_next && next_addr_valid_this_cycle) begin + if (next_addr_valid_this_cycle) begin + base_addr_d = next_address_from_descriptor_i; + end + end else if ((!take_from_next && next_addr_valid_this_cycle) || + !base_valid_q) begin + if (queued_address_valid_i) begin + base_addr_d = queued_address_i; + end + end +end + +always_comb begin : proc_inflight_counter + inflight_counter_d = inflight_counter_q; + if (flush) begin + inflight_counter_d = (staging_addr_valid_legalization && staging_addr_ready_legalization); + end else begin + inflight_counter_d = inflight_counter_q + (next_ar_valid && next_ar_ready) - commit; + end +end + +always_comb begin : proc_feedback_addr + // Normally, the next feedback address is the one we're commiting. + feedback_addr_o = speculation_addr; + feedback_addr_valid_o = commit && speculation_valid; + // After a flush or when starting fresh however, we have a first address + // that is known and doesn't pass through the speculation buffer. We need + // to pass that address through in that case. + if (!flush) begin + if (!staging_addr.speculative && + staging_addr_valid_legalization && + staging_addr_ready_legalization) begin + + feedback_addr_o = staging_addr.addr; + feedback_addr_valid_o = 1'b1; + end + end else begin + feedback_addr_o = staging_addr_legalization; + feedback_addr_valid_o = staging_addr_valid_legalization && staging_addr_ready_legalization; + end +end + +assign queued_address_ready_o = !take_from_next && (!base_valid_q || next_addr_valid_this_cycle); + +`FF(inflight_counter_q, inflight_counter_d, '0); +`FF(base_addr_q, base_addr_d, '0); +`FF(next_addr_valid_q, next_addr_valid_d, 1'b0); +`FF(base_valid_q, base_valid_d, 1'b0); +`FF(flush_q, flush_d, 1'b0); +assign flush_d = flush; + +stream_fifo #( + .FALL_THROUGH(1'b1), + .DEPTH (NSpeculation), + .T (addr_t) +) i_speculation_fifo ( + .clk_i, + .rst_ni, + .flush_i (flush_q), + .testmode_i(1'b0), + .usage_o (speculation_usage_short), + .data_i (staging_addr.addr), + .valid_i (staging_addr_valid_speculation), + .ready_o (staging_addr_ready_speculation), + .data_o (speculation_addr), + .valid_o (speculation_valid), + .ready_i (speculation_ready) +); + +stream_fifo #( + .FALL_THROUGH(1'b1), + .DEPTH (NSpeculation), + .T (addr_spec_t) +) i_pending_ars ( + .clk_i, + .rst_ni, + .flush_i (flush), + .testmode_i(1'b0), + .usage_o ( /* unconnected */ ), + .data_i (next_ar), + .valid_i (next_ar_valid), + .ready_o (next_ar_ready), + .data_o (staging_addr), + .valid_o (staging_addr_valid_pending), + .ready_i (staging_addr_ready_pending) +); + +stream_fifo #( + .FALL_THROUGH(1'b1), + .DEPTH (1), + .T (addr_t) +) i_legalization_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (legalization_usage), + .data_i (staging_addr_legalization), + .valid_i (staging_addr_valid_legalization), + .ready_o (staging_addr_ready_legalization), + .data_o (addr_out), + .valid_o (axi_ar_chan_valid_o), + .ready_i (axi_ar_chan_ready_i) +); + +assign n_requests_to_flush_o = speculation_usage; +assign n_requests_to_flush_valid_o = flush; +assign busy_o = base_valid_q || inflight_counter_q > '0; + +always_comb begin : proc_ar + axi_ar_chan_o = '0; + axi_ar_chan_o.id = axi_ar_id_i; + axi_ar_chan_o.addr = addr_out; + axi_ar_chan_o.len = AxiLength; + axi_ar_chan_o.size = AxiSize; + axi_ar_chan_o.burst = axi_pkg::BURST_INCR; +end + +endmodule diff --git a/src/frontend/desc64/idma_desc64_cva6_synth.sv b/src/frontend/desc64/idma_desc64_cva6_synth.sv new file mode 100644 index 00000000..397011f2 --- /dev/null +++ b/src/frontend/desc64/idma_desc64_cva6_synth.sv @@ -0,0 +1,67 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Authors: +// - Axel Vanoni + +/// Synthesis wrapper for the descriptor-based frontend +module idma_desc64_cva6_synth #( + parameter int AxiAddrWidth = idma_desc64_cva6_synth_pkg::AxiAddrWidth, + parameter int AxiDataWidth = idma_desc64_cva6_synth_pkg::AxiDataWidth, + parameter int AxiUserWidth = idma_desc64_cva6_synth_pkg::AxiUserWidth, + parameter int AxiIdWidth = idma_desc64_cva6_synth_pkg::AxiIdWidth, + parameter int AxiSlvIdWidth = idma_desc64_cva6_synth_pkg::AxiSlvIdWidth, + parameter int NSpeculation = idma_desc64_cva6_synth_pkg::NSpeculation, + parameter int PendingFifoDepth = idma_desc64_cva6_synth_pkg::PendingFifoDepth, + parameter int InputFifoDepth = idma_desc64_cva6_synth_pkg::InputFifoDepth, + parameter type mst_aw_chan_t = idma_desc64_cva6_synth_pkg::mst_aw_chan_t, + parameter type mst_w_chan_t = idma_desc64_cva6_synth_pkg::mst_w_chan_t, + parameter type mst_b_chan_t = idma_desc64_cva6_synth_pkg::mst_b_chan_t, + parameter type mst_ar_chan_t = idma_desc64_cva6_synth_pkg::mst_ar_chan_t, + parameter type mst_r_chan_t = idma_desc64_cva6_synth_pkg::mst_r_chan_t, + parameter type axi_mst_req_t = idma_desc64_cva6_synth_pkg::axi_mst_req_t, + parameter type axi_mst_rsp_t = idma_desc64_cva6_synth_pkg::axi_mst_rsp_t, + parameter type axi_slv_req_t = idma_desc64_cva6_synth_pkg::axi_slv_req_t, + parameter type axi_slv_rsp_t = idma_desc64_cva6_synth_pkg::axi_slv_rsp_t +)( + input logic clk_i, + input logic rst_ni, + input logic testmode_i, + output logic irq_o, + output axi_mst_req_t axi_master_req_o, + input axi_mst_rsp_t axi_master_rsp_i, + input axi_slv_req_t axi_slave_req_i, + output axi_slv_rsp_t axi_slave_rsp_o +); + +idma_desc64_cva6_wrap #( + .AxiAddrWidth (AxiAddrWidth ), + .AxiDataWidth (AxiDataWidth ), + .AxiUserWidth (AxiUserWidth ), + .AxiIdWidth (AxiIdWidth ), + .AxiSlvIdWidth (AxiSlvIdWidth), + .NSpeculation (NSpeculation), + .PendingFifoDepth(PendingFifoDepth), + .InputFifoDepth(InputFifoDepth), + .mst_aw_chan_t (mst_aw_chan_t), + .mst_w_chan_t (mst_w_chan_t ), + .mst_b_chan_t (mst_b_chan_t ), + .mst_ar_chan_t (mst_ar_chan_t), + .mst_r_chan_t (mst_r_chan_t ), + .axi_mst_req_t (axi_mst_req_t), + .axi_mst_rsp_t (axi_mst_rsp_t), + .axi_slv_req_t (axi_slv_req_t), + .axi_slv_rsp_t (axi_slv_rsp_t) +) i_idma_desc64_cva6_wrap ( + .clk_i, + .rst_ni, + .testmode_i, + .irq_o, + .axi_master_req_o, + .axi_master_rsp_i, + .axi_slave_req_i, + .axi_slave_rsp_o +); + +endmodule diff --git a/src/frontend/desc64/idma_desc64_cva6_synth_pkg.sv b/src/frontend/desc64/idma_desc64_cva6_synth_pkg.sv new file mode 100644 index 00000000..e889c0c9 --- /dev/null +++ b/src/frontend/desc64/idma_desc64_cva6_synth_pkg.sv @@ -0,0 +1,30 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Authors: +// - Axel Vanoni + +`include "axi/typedef.svh" + +/// Synthesis package for the descriptor-based frontend +package idma_desc64_cva6_synth_pkg; + `AXI_TYPEDEF_ALL(axi, logic [63:0], logic [2:0], logic [63:0], logic [7:0], logic) + parameter int AxiAddrWidth = 64; + parameter int AxiDataWidth = 64; + parameter int AxiUserWidth = 1; + parameter int AxiIdWidth = 3; + parameter int AxiSlvIdWidth = 3; + parameter int NSpeculation = 4; + parameter int PendingFifoDepth = 4; + parameter int InputFifoDepth = 1; + parameter type mst_aw_chan_t = axi_aw_chan_t; // AW Channel Type, master port + parameter type mst_w_chan_t = axi_w_chan_t; // W Channel Type, all ports + parameter type mst_b_chan_t = axi_b_chan_t; // B Channel Type, master port + parameter type mst_ar_chan_t = axi_ar_chan_t; // AR Channel Type, master port + parameter type mst_r_chan_t = axi_r_chan_t; // R Channel Type, master port + parameter type axi_mst_req_t = axi_req_t; + parameter type axi_mst_rsp_t = axi_resp_t; + parameter type axi_slv_req_t = axi_req_t; + parameter type axi_slv_rsp_t = axi_resp_t; +endpackage diff --git a/src/frontend/desc64/idma_desc64_cva6_wrap.sv b/src/frontend/desc64/idma_desc64_cva6_wrap.sv new file mode 100644 index 00000000..7d808a56 --- /dev/null +++ b/src/frontend/desc64/idma_desc64_cva6_wrap.sv @@ -0,0 +1,334 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Authors: +// - Axel Vanoni + +`include "axi/assign.svh" +`include "axi/typedef.svh" +`include "idma/guard.svh" +`include "idma/typedef.svh" +`include "idma/tracer.svh" +`include "register_interface/typedef.svh" +`include "common_cells/registers.svh" + +/// Wrapper for the iDMA +module idma_desc64_cva6_wrap #( + parameter int AxiAddrWidth = 64, + parameter int AxiDataWidth = 64, + parameter int AxiUserWidth = -1, + parameter int AxiIdWidth = -1, + parameter int AxiSlvIdWidth = -1, + parameter int NSpeculation = 4, + parameter int PendingFifoDepth = 4, + parameter int InputFifoDepth = 1, + parameter type mst_aw_chan_t = logic, // AW Channel Type, master port + parameter type mst_w_chan_t = logic, // W Channel Type, all ports + parameter type mst_b_chan_t = logic, // B Channel Type, master port + parameter type mst_ar_chan_t = logic, // AR Channel Type, master port + parameter type mst_r_chan_t = logic, // R Channel Type, master port + parameter type axi_mst_req_t = logic, + parameter type axi_mst_rsp_t = logic, + parameter type axi_slv_req_t = logic, + parameter type axi_slv_rsp_t = logic +) ( + input logic clk_i, + input logic rst_ni, + input logic testmode_i, + output logic irq_o, + output axi_mst_req_t axi_master_req_o, + input axi_mst_rsp_t axi_master_rsp_i, + input axi_slv_req_t axi_slave_req_i, + output axi_slv_rsp_t axi_slave_rsp_o +); + import axi_pkg::*; + + typedef logic [AxiAddrWidth-1:0] addr_t; + typedef logic [AxiDataWidth-1:0] data_t; + typedef logic [(AxiDataWidth/8)-1:0] strb_t; + typedef logic [AxiUserWidth-1:0] user_t; + // has one less bit for the mux not to error + typedef logic [AxiIdWidth-2:0] post_mux_id_t; + + localparam int unsigned NumAxInFlight = NSpeculation < 3 ? 3 : NSpeculation; + localparam int unsigned BufferDepth = 3; + + axi_slv_req_t axi_slv_req; + axi_slv_rsp_t axi_slv_rsp; + + `AXI_TYPEDEF_ALL(dma_axi_mst_post_mux, addr_t, post_mux_id_t, data_t, strb_t, user_t) + dma_axi_mst_post_mux_req_t axi_fe_mst_req; + dma_axi_mst_post_mux_resp_t axi_fe_mst_rsp; + dma_axi_mst_post_mux_req_t axi_read_req, axi_write_req, axi_be_mst_req; + dma_axi_mst_post_mux_resp_t axi_read_rsp, axi_write_rsp, axi_be_mst_rsp; + + `REG_BUS_TYPEDEF_ALL(dma_reg, addr_t, data_t, strb_t) + dma_reg_req_t dma_reg_slv_req; + dma_reg_rsp_t dma_reg_slv_rsp; + + // iDMA struct definitions + localparam int unsigned TFLenWidth = 32; + typedef logic [TFLenWidth-1:0] tf_len_t; + + // iDMA request / response types + `IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, post_mux_id_t, addr_t, tf_len_t) + `IDMA_TYPEDEF_FULL_RSP_T(idma_rsp_t, addr_t) + + typedef struct packed { + dma_axi_mst_post_mux_ar_chan_t ar_chan; + } axi_read_meta_channel_t; + + typedef struct packed { + axi_read_meta_channel_t axi; + } read_meta_channel_t; + + typedef struct packed { + dma_axi_mst_post_mux_aw_chan_t aw_chan; + } axi_write_meta_channel_t; + + typedef struct packed { + axi_write_meta_channel_t axi; + } write_meta_channel_t; + + idma_req_t idma_req; + logic idma_req_valid; + logic idma_req_ready; + + idma_rsp_t idma_rsp; + logic idma_rsp_valid; + logic idma_rsp_ready; + idma_pkg::idma_busy_t idma_busy; + + idma_desc64_top #( + .AddrWidth ( AxiAddrWidth ), + .DataWidth ( AxiDataWidth ), + .AxiIdWidth ( AxiIdWidth - 1 ), + .idma_req_t ( idma_req_t ), + .idma_rsp_t ( idma_rsp_t ), + .axi_req_t ( dma_axi_mst_post_mux_req_t ), + .axi_rsp_t ( dma_axi_mst_post_mux_resp_t ), + .axi_ar_chan_t ( dma_axi_mst_post_mux_ar_chan_t ), + .axi_r_chan_t ( dma_axi_mst_post_mux_r_chan_t ), + .reg_req_t ( dma_reg_req_t ), + .reg_rsp_t ( dma_reg_rsp_t ), + .InputFifoDepth ( InputFifoDepth ), + .PendingFifoDepth ( PendingFifoDepth ), + .BackendDepth ( NumAxInFlight + BufferDepth ), + .NSpeculation ( NSpeculation ) + ) i_dma_desc64 ( + .clk_i, + .rst_ni, + .master_req_o ( axi_fe_mst_req ), + .master_rsp_i ( axi_fe_mst_rsp ), + .axi_ar_id_i ( '1 ), + .axi_aw_id_i ( '1 ), + .slave_req_i ( dma_reg_slv_req ), + .slave_rsp_o ( dma_reg_slv_rsp ), + .idma_req_o ( idma_req ), + .idma_req_valid_o ( idma_req_valid ), + .idma_req_ready_i ( idma_req_ready ), + .idma_rsp_i ( idma_rsp ), + .idma_rsp_valid_i ( idma_rsp_valid ), + .idma_rsp_ready_o ( idma_rsp_ready ), + .idma_busy_i ( |idma_busy ), + .irq_o ( irq_o ) + ); + + idma_backend_rw_axi #( + .DataWidth ( AxiDataWidth ), + .AddrWidth ( AxiAddrWidth ), + .AxiIdWidth ( AxiIdWidth-1 ), + .UserWidth ( AxiUserWidth ), + .TFLenWidth ( TFLenWidth ), + .MaskInvalidData ( 1'b1 ), + .BufferDepth ( BufferDepth ), + .RAWCouplingAvail ( 1'b1 ), + .HardwareLegalizer ( 1'b1 ), + .RejectZeroTransfers ( 1'b1 ), + .ErrorCap ( idma_pkg::NO_ERROR_HANDLING ), + .CombinedShifter ( 1'b0 ), + .PrintFifoInfo ( 1'b0 ), + .NumAxInFlight ( NumAxInFlight ), + .MemSysDepth ( 32'd0 ), + .idma_req_t ( idma_req_t ), + .idma_rsp_t ( idma_rsp_t ), + .idma_eh_req_t ( idma_pkg::idma_eh_req_t ), + .idma_busy_t ( idma_pkg::idma_busy_t ), + .axi_req_t ( dma_axi_mst_post_mux_req_t ), + .axi_rsp_t ( dma_axi_mst_post_mux_resp_t ), + .write_meta_channel_t ( write_meta_channel_t ), + .read_meta_channel_t ( read_meta_channel_t ) + ) i_idma_backend ( + .clk_i, + .rst_ni, + .testmode_i, + .idma_req_i ( idma_req ), + .req_valid_i ( idma_req_valid ), + .req_ready_o ( idma_req_ready ), + .idma_rsp_o ( idma_rsp ), + .rsp_valid_o ( idma_rsp_valid ), + .rsp_ready_i ( idma_rsp_ready ), + .idma_eh_req_i ( '0 ), + .eh_req_valid_i ( 1'b1 ), + .eh_req_ready_o ( /*NOT CONNECTED*/ ), + .axi_read_req_o ( axi_read_req ), + .axi_read_rsp_i ( axi_read_rsp ), + .axi_write_req_o ( axi_write_req ), + .axi_write_rsp_i ( axi_write_rsp ), + .busy_o ( idma_busy ) + ); + + // Read Write Join + axi_rw_join #( + .axi_req_t ( dma_axi_mst_post_mux_req_t ), + .axi_resp_t ( dma_axi_mst_post_mux_resp_t ) + ) i_axi_rw_join ( + .clk_i, + .rst_ni, + .slv_read_req_i ( axi_read_req ), + .slv_read_resp_o ( axi_read_rsp ), + .slv_write_req_i ( axi_write_req ), + .slv_write_resp_o ( axi_write_rsp ), + .mst_req_o ( axi_be_mst_req ), + .mst_resp_i ( axi_be_mst_rsp ) + ); + + `IDMA_NONSYNTH_BLOCK( + string trace_file; + initial begin + void'($value$plusargs("trace_file=%s", trace_file)); + end + initial begin : inital_tracer + automatic bit first_iter = 1; + automatic integer tf; + automatic `IDMA_TRACER_MAX_TYPE cnst [string]; + automatic `IDMA_TRACER_MAX_TYPE meta [string]; + automatic `IDMA_TRACER_MAX_TYPE busy [string]; + automatic `IDMA_TRACER_MAX_TYPE axib [string]; + automatic string trace; + #0; + tf = $fopen(trace_file, "w"); + $display("[Tracer] Logging iDMA backend %s to %s", "i_idma_backend", trace_file); + forever begin + @(posedge i_idma_backend.clk_i); + if (i_idma_backend.rst_ni & |i_idma_backend.busy_o) begin + break; + end + end + forever begin + @(posedge i_idma_backend.clk_i); + /* Trace */ + trace = "{"; + /* Constants */ + cnst = '{ + "inst" : "i_idma_backend", + "data_width" : i_idma_backend.DataWidth, + "addr_width" : i_idma_backend.AddrWidth, + "user_width" : i_idma_backend.UserWidth, + "axi_id_width" : i_idma_backend.AxiIdWidth, + "num_ax_in_flight" : i_idma_backend.NumAxInFlight, + "buffer_depth" : i_idma_backend.BufferDepth, + "tf_len_width" : i_idma_backend.TFLenWidth, + "mem_sys_depth" : i_idma_backend.MemSysDepth, + "rw_coupling_avail" : i_idma_backend.RAWCouplingAvail, + "mask_invalid_data" : i_idma_backend.MaskInvalidData, + "hardware_legalizer" : i_idma_backend.HardwareLegalizer, + "reject_zero_transfers" : i_idma_backend.RejectZeroTransfers, + "error_cap" : i_idma_backend.ErrorCap, + "print_fifo_info" : i_idma_backend.PrintFifoInfo + }; + meta = '{ + "time" : $time() + }; + busy = '{ + "buffer" : i_idma_backend.busy_o.buffer_busy, + "r_dp" : i_idma_backend.busy_o.r_dp_busy, + "w_dp" : i_idma_backend.busy_o.w_dp_busy, + "r_leg" : i_idma_backend.busy_o.r_leg_busy, + "w_leg" : i_idma_backend.busy_o.w_leg_busy, + "eh_fsm" : i_idma_backend.busy_o.eh_fsm_busy, + "eh_cnt" : i_idma_backend.busy_o.eh_cnt_busy, + "raw_coupler" : i_idma_backend.busy_o.raw_coupler_busy + }; + axib = '{ + "w_valid" : i_idma_backend.protocol_req_o.w_valid, + "w_ready" : axi_be_mst_rsp.w_ready, + "w_strb" : i_idma_backend.protocol_req_o.w.strb, + "r_valid" : axi_be_mst_rsp.r_valid, + "r_ready" : i_idma_backend.protocol_req_o.r_ready + }; + if ($isunknown(axib["w_ready"]) || $isunknown(axib["r_valid"])) begin + $fatal("UNKNOWN AXI STATE, THIS SHOULD NEVER HAPPEN!"); + end + /* Assembly */ + `IDMA_TRACER_STR_ASSEMBLY(cnst, first_iter); + `IDMA_TRACER_STR_ASSEMBLY(meta, 1); + `IDMA_TRACER_STR_ASSEMBLY(busy, 1); + `IDMA_TRACER_STR_ASSEMBLY(axib, 1); + `IDMA_TRACER_CLEAR_COND(first_iter); + /* Commit */ + $fwrite(tf, $sformatf("%s}\n", trace)); + end + end + ) + + axi_mux #( + .SlvAxiIDWidth(AxiIdWidth - 1), + .slv_aw_chan_t(dma_axi_mst_post_mux_aw_chan_t), + .mst_aw_chan_t(mst_aw_chan_t), + .w_chan_t (mst_w_chan_t), // same channel type for master+slave + .slv_b_chan_t (dma_axi_mst_post_mux_b_chan_t), + .mst_b_chan_t (mst_b_chan_t), + .slv_ar_chan_t(dma_axi_mst_post_mux_ar_chan_t), + .mst_ar_chan_t(mst_ar_chan_t), + .slv_r_chan_t (dma_axi_mst_post_mux_r_chan_t), + .mst_r_chan_t (mst_r_chan_t), + .slv_req_t (dma_axi_mst_post_mux_req_t), + .slv_resp_t (dma_axi_mst_post_mux_resp_t), + .mst_req_t (axi_mst_req_t), + .mst_resp_t (axi_mst_rsp_t), + .NoSlvPorts ('d2), + .MaxWTrans ('d2), + .FallThrough ('0), + .SpillAw ('b0), + .SpillW ('0), + .SpillB ('0), + .SpillAr ('b0), + .SpillR ('0) + ) i_axi_mux ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .test_i (1'b0), + .slv_reqs_i ({axi_fe_mst_req, axi_be_mst_req}), + .slv_resps_o ({axi_fe_mst_rsp, axi_be_mst_rsp}), + .mst_req_o (axi_master_req_o), + .mst_resp_i (axi_master_rsp_i) + ); + + axi_to_reg #( + .ADDR_WIDTH (AxiAddrWidth), + .DATA_WIDTH (AxiDataWidth), + .ID_WIDTH (AxiSlvIdWidth), + .USER_WIDTH (AxiUserWidth), + .AXI_MAX_WRITE_TXNS(32'd1), + .AXI_MAX_READ_TXNS (32'd1), + .DECOUPLE_W (1'b1), + .axi_req_t (axi_slv_req_t), + .axi_rsp_t (axi_slv_rsp_t), + .reg_req_t (dma_reg_req_t), + .reg_rsp_t (dma_reg_rsp_t) + ) i_axi_to_reg ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .testmode_i(testmode_i), + .axi_req_i (axi_slv_req), + .axi_rsp_o (axi_slv_rsp), + .reg_req_o (dma_reg_slv_req), + .reg_rsp_i (dma_reg_slv_rsp) + ); + + assign axi_slv_req = axi_slave_req_i; + assign axi_slave_rsp_o = axi_slv_rsp; + +endmodule diff --git a/src/frontend/desc64/idma_desc64_reader.sv b/src/frontend/desc64/idma_desc64_reader.sv new file mode 100644 index 00000000..d1975bf0 --- /dev/null +++ b/src/frontend/desc64/idma_desc64_reader.sv @@ -0,0 +1,178 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Authors: +// - Axel Vanoni + +`include "common_cells/registers.svh" + +/// This module takes in an AXI R-channel, and reads descriptors from it. +/// Note that an using an address width other than 64 bits will need +/// modifications. +module idma_desc64_reader #( + /// Address width of the AXI bus + parameter int unsigned AddrWidth = 64, + /// Data width of the AXI bus + parameter int unsigned DataWidth = 64, + /// iDMA request type + parameter type idma_req_t = logic, + /// AXI R channel type + parameter type axi_r_chan_t = logic, + /// Configuration descriptor type + parameter type descriptor_t = logic, + /// AXI bus address type, derived from the address width + parameter type addr_t = logic [AddrWidth-1:0] +)( + /// clock + input logic clk_i, + /// reset + input logic rst_ni, + /// axi read channel + input axi_r_chan_t r_chan_i, + /// read channel valid + input logic r_chan_valid_i, + /// read channel ready + output logic r_chan_ready_o, + /// idma request + output idma_req_t idma_req_o, + /// idma request valid + output logic idma_req_valid_o, + /// idma request ready + /// NOTE: we assume that if a read was launched, + /// the connected fifo has still space left, i.e. this signal is always + /// 1 if a request is in-flight. If a request is in-flight and there + /// is not enough space in the fifo, we will either stall the bus or + /// drop the request. + input logic idma_req_ready_i, + /// location of the next descriptor address + output addr_t next_descriptor_addr_o, + /// whether next_descriptor_addr is valid + output logic next_descriptor_addr_valid_o, + /// whether this descriptor needs an IRQ raised + output logic do_irq_o, + /// whether do_irq_o is valid + output logic do_irq_valid_o, + /// whether a request is in-flight + output logic idma_req_inflight_o +); + +descriptor_t current_descriptor; + +if (DataWidth == 256) begin : gen_256_data_path + assign current_descriptor = r_chan_i.data; + assign idma_req_valid_o = r_chan_valid_i; + assign next_descriptor_addr_valid_o = r_chan_valid_i; + assign do_irq_valid_o = r_chan_valid_i; + assign idma_req_inflight_o = r_chan_valid_i; +end else if (DataWidth == 128) begin : gen_128_data_path + logic [127:0] first_half_of_descriptor_q, first_half_of_descriptor_d; + logic [127:0] second_half_of_descriptor; + logic irq_addr_valid_q, irq_addr_valid_d; + + assign idma_req_valid_o = r_chan_valid_i && r_chan_i.last; + assign next_descriptor_addr_valid_o = irq_addr_valid_q; + assign do_irq_valid_o = irq_addr_valid_q; + assign idma_req_inflight_o = r_chan_valid_i || irq_addr_valid_q; + + assign current_descriptor = descriptor_t'{ + first_half_of_descriptor_q, + second_half_of_descriptor + }; + + always_comb begin + first_half_of_descriptor_d = first_half_of_descriptor_q; + if (r_chan_valid_i && r_chan_ready_o && !r_chan_i.last) begin + first_half_of_descriptor_d = r_chan_i.data; + end + end + + always_comb begin + // the irq and next address fields are valid + // from receiving the first half until the + // second half was received + irq_addr_valid_d = irq_addr_valid_q; + if (r_chan_valid_i && r_chan_ready_o) begin + irq_addr_valid_d = !r_chan_i.last; + end + end + + `FF(first_half_of_descriptor_q, first_half_of_descriptor_d, 128'b0); + `FF(irq_addr_valid_q, irq_addr_valid_d, 1'b0); +end else if (DataWidth == 64) begin : gen_64_data_path + logic [1:0] fetch_counter_q, fetch_counter_d; + logic [2:0][63:0] descriptor_data_q, descriptor_data_d; + logic [63:0] descriptor_data_last; + + assign idma_req_valid_o = r_chan_valid_i && r_chan_i.last; + assign do_irq_valid_o = fetch_counter_q == 2'b01; + assign next_descriptor_addr_valid_o = fetch_counter_q == 2'b10; + assign descriptor_data_last = r_chan_i.data; + assign idma_req_inflight_o = fetch_counter_q != 2'b00; + + assign current_descriptor = { + descriptor_data_q[0], + descriptor_data_q[1], + descriptor_data_q[2], + descriptor_data_last + }; + + always_comb begin : proc_fetch_data + descriptor_data_d = descriptor_data_q; + fetch_counter_d = fetch_counter_q; + if (r_chan_valid_i && r_chan_ready_o && !r_chan_i.last) begin + descriptor_data_d[fetch_counter_q] = r_chan_i.data; + fetch_counter_d = fetch_counter_q + 2'b01; + end if (r_chan_valid_i && r_chan_i.last) begin + fetch_counter_d = 2'b00; + end + end + + `FF(descriptor_data_q, descriptor_data_d, 192'b0); + `FF(fetch_counter_q, fetch_counter_d, 2'b0); +end else if (DataWidth == 32) begin : gen_32_data_path + logic [2:0] fetch_counter_q, fetch_counter_d; + logic [6:0][31:0] descriptor_data_q, descriptor_data_d; + logic [31:0] descriptor_data_last; + + assign idma_req_valid_o = r_chan_valid_i && r_chan_i.last; + assign do_irq_valid_o = fetch_counter_q == 3'd2; + assign next_descriptor_addr_valid_o = fetch_counter_q == 3'd4; + assign descriptor_data_last = r_chan_i.data; + assign idma_req_inflight_o = fetch_counter_q != 3'd0; + + assign current_descriptor = { + descriptor_data_q, + descriptor_data_last + }; + + always_comb begin : proc_fetch_data + descriptor_data_d = descriptor_data_q; + fetch_counter_d = fetch_counter_q; + if (r_chan_valid_i && r_chan_ready_o && !r_chan_i.last) begin + descriptor_data_d[fetch_counter_q] = r_chan_i.data; + fetch_counter_d = fetch_counter_q + 3'b001; + end if (r_chan_valid_i && r_chan_i.last) begin + fetch_counter_d = 3'b0; + end + end +end + +idma_desc64_reshaper #( + .idma_req_t (idma_req_t), + .addr_t (addr_t), + .descriptor_t(descriptor_t) +) i_descriptor_reshaper ( + .descriptor_i (current_descriptor), + .idma_req_o, + .next_addr_o (next_descriptor_addr_o), + .do_irq_o +); + +// The user should take care that the connected fifo always has +// enough space to put in the new descriptor. If it does not, +// instead of dropping requests, stall the bus (unless we're +// dropping this descriptor). +assign r_chan_ready_o = idma_req_ready_i; + +endmodule diff --git a/src/frontend/desc64/idma_desc64_reader_gater.sv b/src/frontend/desc64/idma_desc64_reader_gater.sv new file mode 100644 index 00000000..f0ba964f --- /dev/null +++ b/src/frontend/desc64/idma_desc64_reader_gater.sv @@ -0,0 +1,59 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Authors: +// - Axel Vanoni + +`include "common_cells/registers.svh" + +/// This module takes in an AXI R-channel, and reads descriptors from it. +/// Note that an using an address width other than 64 bits will need +/// modifications. +module idma_desc64_reader_gater #( + parameter type flush_t = logic +)( + input logic clk_i, + input logic rst_ni, + input flush_t n_to_flush_i, + input logic n_to_flush_valid_i, + input logic r_valid_i, + output logic r_valid_o, + input logic r_ready_i, + output logic r_ready_o, + input logic r_last_i +); + +flush_t n_to_flush_q, n_to_flush_d; +logic flush; +logic engage_q, engage_d; + +assign flush = engage_q && (n_to_flush_q > '0 || (n_to_flush_valid_i && n_to_flush_i > '0)); + +// engange gating only after the last r transaction is done +always_comb begin + engage_d = engage_q; + if (n_to_flush_valid_i || n_to_flush_q == '0) begin + engage_d = 1'b0; + end else if (r_last_i && r_valid_i && r_ready_i) begin + engage_d = 1'b1; + end +end + +always_comb begin + n_to_flush_d = n_to_flush_q; + if (r_last_i && r_valid_i && n_to_flush_q > '0 && engage_q) begin + n_to_flush_d = n_to_flush_q - 1'b1; + end + if (n_to_flush_valid_i) begin + n_to_flush_d = n_to_flush_i; + end +end + +`FF(n_to_flush_q, n_to_flush_d, 'b0); +`FF(engage_q, engage_d, 'b0); + +assign r_valid_o = flush ? 1'b0 : r_valid_i; +assign r_ready_o = flush ? 1'b1 : r_ready_i; + +endmodule diff --git a/src/frontend/desc64/idma_desc64_reg_wrapper.sv b/src/frontend/desc64/idma_desc64_reg_wrapper.sv index 0a1e61af..44ac8ae2 100644 --- a/src/frontend/desc64/idma_desc64_reg_wrapper.sv +++ b/src/frontend/desc64/idma_desc64_reg_wrapper.sv @@ -1,9 +1,9 @@ -// Copyright 2022 ETH Zurich and University of Bologna. +// Copyright 2023 ETH Zurich and University of Bologna. // Solderpad Hardware License, Version 0.51, see LICENSE for details. // SPDX-License-Identifier: SHL-0.51 // Authors: -// - Axel Vanoni +// - Axel Vanoni `include "common_cells/registers.svh" @@ -15,22 +15,22 @@ import idma_desc64_reg_pkg::idma_desc64_hw2reg_t; #( parameter type reg_req_t = logic, parameter type reg_rsp_t = logic ) ( - input logic clk_i , - input logic rst_ni , - input reg_req_t reg_req_i , - output reg_rsp_t reg_rsp_o , - output idma_desc64_reg2hw_t reg2hw_o , - input idma_desc64_hw2reg_t hw2reg_i , - input logic devmode_i , - input logic descriptor_fifo_ready_i, - output logic descriptor_fifo_valid_o + input logic clk_i , + input logic rst_ni , + input reg_req_t reg_req_i , + output reg_rsp_t reg_rsp_o , + output idma_desc64_reg2hw_t reg2hw_o , + input idma_desc64_hw2reg_t hw2reg_i , + input logic devmode_i , + output logic input_addr_valid_o, + input logic input_addr_ready_i ); import idma_desc64_reg_pkg::IDMA_DESC64_DESC_ADDR_OFFSET; reg_req_t request; reg_rsp_t response; - logic descriptor_fifo_valid_q, descriptor_fifo_valid_d; + logic input_addr_valid_q, input_addr_valid_d; idma_desc64_reg_top #( .reg_req_t (reg_req_t), @@ -54,7 +54,7 @@ import idma_desc64_reg_pkg::idma_desc64_hw2reg_t; #( always_comb begin if (reg_req_i.addr == IDMA_DESC64_DESC_ADDR_OFFSET) begin - request.valid = reg_req_i.valid && descriptor_fifo_ready_i; + request.valid = reg_req_i.valid && input_addr_ready_i; end else begin request.valid = reg_req_i.valid; end @@ -63,22 +63,22 @@ import idma_desc64_reg_pkg::idma_desc64_hw2reg_t; #( always_comb begin // only take into account the fifo if a write is going to it if (reg_req_i.addr == IDMA_DESC64_DESC_ADDR_OFFSET) begin - reg_rsp_o.ready = response.ready && descriptor_fifo_ready_i; - descriptor_fifo_valid_o = descriptor_fifo_valid_q; + reg_rsp_o.ready = response.ready && input_addr_ready_i; + input_addr_valid_o = reg2hw_o.desc_addr.qe || input_addr_valid_q; end else begin reg_rsp_o.ready = response.ready; - descriptor_fifo_valid_o = '0; + input_addr_valid_o = '0; end end always_comb begin - descriptor_fifo_valid_d = descriptor_fifo_valid_q; - if (reg2hw_o.desc_addr.qe) begin - descriptor_fifo_valid_d = 1'b1; - end else if (descriptor_fifo_ready_i) begin - descriptor_fifo_valid_d = '0; + input_addr_valid_d = input_addr_valid_q; + if (reg2hw_o.desc_addr.qe && !input_addr_ready_i) begin + input_addr_valid_d = 1'b1; + end else if (input_addr_ready_i) begin + input_addr_valid_d = '0; end end - `FF(descriptor_fifo_valid_q, descriptor_fifo_valid_d, '0); + `FF(input_addr_valid_q, input_addr_valid_d, '0); endmodule diff --git a/src/frontend/desc64/idma_desc64_reshaper.sv b/src/frontend/desc64/idma_desc64_reshaper.sv new file mode 100644 index 00000000..77c279f1 --- /dev/null +++ b/src/frontend/desc64/idma_desc64_reshaper.sv @@ -0,0 +1,63 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Authors: +// - Axel Vanoni + +/// This module reshapes the 256 bits of a descriptor into its corresponding +/// iDMA backend request +module idma_desc64_reshaper #( + parameter type idma_req_t = logic, + parameter type addr_t = logic, + parameter type descriptor_t = logic +)( + input descriptor_t descriptor_i, + output idma_req_t idma_req_o, + output addr_t next_addr_o, + output logic do_irq_o +); + +assign next_addr_o = descriptor_i.next; +assign do_irq_o = descriptor_i.flags[0]; + +always_comb begin + idma_req_o = '0; + + idma_req_o.length = descriptor_i.length; + idma_req_o.src_addr = descriptor_i.src_addr; + idma_req_o.dst_addr = descriptor_i.dest_addr; + + // Current backend only supports one ID + idma_req_o.opt.axi_id = descriptor_i.flags[23:16]; + idma_req_o.opt.src.burst = descriptor_i.flags[2:1]; + idma_req_o.opt.src.cache = descriptor_i.flags[11:8]; + // AXI4 does not support locked transactions, use atomics + idma_req_o.opt.src.lock = '0; + // unpriviledged, secure, data access + idma_req_o.opt.src.prot = '0; + // not participating in qos + idma_req_o.opt.src.qos = '0; + // only one region + idma_req_o.opt.src.region = '0; + idma_req_o.opt.dst.burst = descriptor_i.flags[4:3]; + idma_req_o.opt.dst.cache = descriptor_i.flags[15:12]; + // AXI4 does not support locked transactions, use atomics + idma_req_o.opt.dst.lock = '0; + // unpriviledged, secure, data access + idma_req_o.opt.dst.prot = '0; + // not participating in qos + idma_req_o.opt.dst.qos = '0; + // only one region in system + idma_req_o.opt.dst.region = '0; + idma_req_o.opt.beo.decouple_aw = descriptor_i.flags[6]; + idma_req_o.opt.beo.decouple_rw = descriptor_i.flags[5]; + // this frontend currently only supports completely debursting + idma_req_o.opt.beo.src_max_llen = '0; + // this frontend currently only supports completely debursting + idma_req_o.opt.beo.dst_max_llen = '0; + idma_req_o.opt.beo.src_reduce_len = descriptor_i.flags[7]; + idma_req_o.opt.beo.dst_reduce_len = descriptor_i.flags[7]; +end + +endmodule diff --git a/src/frontend/desc64/idma_desc64_shared_counter.sv b/src/frontend/desc64/idma_desc64_shared_counter.sv deleted file mode 100644 index 31d3a7ad..00000000 --- a/src/frontend/desc64/idma_desc64_shared_counter.sv +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2022 ETH Zurich and University of Bologna. -// Solderpad Hardware License, Version 0.51, see LICENSE for details. -// SPDX-License-Identifier: SHL-0.51 - -// Authors: -// - Axel Vanoni - -`include "common_cells/registers.svh" -/// This module allows two domains to share a counter -/// One end can increment the counter, the other can -/// decrement it. This can be used as a lightweight -/// FIFO if the only data that would be transmitted is 1 -/// Note that the counter wraps on overflow, but saturates -/// on underflow -module idma_desc64_shared_counter #( - parameter int unsigned CounterWidth = 4 -) ( - input logic clk_i , - input logic rst_ni , - /// Whether the internal counter should increment - input logic increment_i , - /// Whether the internal counter should decrement - input logic decrement_i , - /// Whether the internal counter is above zero - output logic greater_than_zero_o -); - -typedef logic [CounterWidth-1:0] counter_t; - -counter_t counter_d, counter_q; -`FF(counter_q, counter_d, '0); - -assign greater_than_zero_o = counter_q != '0; - -always_comb begin - counter_d = counter_q; - unique casez ({increment_i, decrement_i, counter_q != 0}) - 3'b11?: begin - counter_d = counter_q; - end - 3'b10?: begin - counter_d = counter_q + 1; - end - 3'b011: begin - counter_d = counter_q - 1; - end - 3'b010: begin - // don't underflow - counter_d = counter_q; - end - 3'b00?: begin - counter_d = counter_q; - end - default: ; - endcase -end - -endmodule diff --git a/src/frontend/desc64/idma_desc64_synth.sv b/src/frontend/desc64/idma_desc64_synth.sv index 24a88530..842aff06 100644 --- a/src/frontend/desc64/idma_desc64_synth.sv +++ b/src/frontend/desc64/idma_desc64_synth.sv @@ -1,49 +1,75 @@ -// Copyright 2022 ETH Zurich and University of Bologna. +// Copyright 2023 ETH Zurich and University of Bologna. // Solderpad Hardware License, Version 0.51, see LICENSE for details. // SPDX-License-Identifier: SHL-0.51 // Authors: -// - Axel Vanoni +// - Axel Vanoni /// synth wrapper module idma_desc64_synth #( - parameter int unsigned AddrWidth = idma_desc64_synth_pkg::AddrWidth, - parameter type burst_req_t = idma_desc64_synth_pkg::burst_req_t, - parameter type reg_rsp_t = idma_desc64_synth_pkg::reg_rsp_t, - parameter type reg_req_t = idma_desc64_synth_pkg::reg_req_t + parameter int unsigned AddrWidth = idma_desc64_synth_pkg::AddrWidth, + parameter int unsigned DataWidth = idma_desc64_synth_pkg::DataWidth, + parameter int unsigned AxiIdWidth = idma_desc64_synth_pkg::IdWidth, + parameter type idma_req_t = idma_desc64_synth_pkg::idma_req_t, + parameter type idma_rsp_t = idma_desc64_synth_pkg::idma_rsp_t, + parameter type axi_rsp_t = idma_desc64_synth_pkg::axi_rsp_t, + parameter type axi_req_t = idma_desc64_synth_pkg::axi_req_t, + parameter type axi_ar_chan_t = idma_desc64_synth_pkg::axi_ar_chan_t, + parameter type axi_r_chan_t = idma_desc64_synth_pkg::axi_r_chan_t, + parameter type reg_rsp_t = idma_desc64_synth_pkg::reg_rsp_t, + parameter type reg_req_t = idma_desc64_synth_pkg::reg_req_t, + parameter int unsigned InputFifoDepth = idma_desc64_synth_pkg::InputFifoDepth, + parameter int unsigned PendingFifoDepth = idma_desc64_synth_pkg::PendingFifoDepth )( - input logic clk_i, - input logic rst_ni, - output reg_req_t master_req_o, - input reg_rsp_t master_rsp_i, - input reg_req_t slave_req_i, - output reg_rsp_t slave_rsp_o, - output burst_req_t dma_be_req_o, - output logic dma_be_valid_o, - input logic dma_be_ready_i, - input logic dma_be_tx_complete_i, - input logic dma_be_idle_i, - output logic irq_o + input logic clk_i , + input logic rst_ni , + output axi_req_t master_req_o , + input axi_rsp_t master_rsp_i , + input logic [AxiIdWidth-1:0] axi_ar_id_i , + input logic [AxiIdWidth-1:0] axi_aw_id_i , + input reg_req_t slave_req_i , + output reg_rsp_t slave_rsp_o , + output idma_req_t idma_req_o , + output logic idma_req_valid_o, + input logic idma_req_ready_i, + input idma_rsp_t idma_rsp_i , + input logic idma_rsp_valid_i, + output logic idma_rsp_ready_o, + input logic idma_busy_i , + output logic irq_o ); - idma_desc64 #( - .AddrWidth ( AddrWidth ), - .burst_req_t ( burst_req_t ), - .reg_rsp_t ( reg_rsp_t ), - .reg_req_t ( reg_req_t ) - ) i_idma_desc64 ( - .clk_i, - .rst_ni, - .master_req_o, - .master_rsp_i, - .slave_req_i, - .slave_rsp_o, - .dma_be_req_o, - .dma_be_valid_o, - .dma_be_ready_i, - .dma_be_tx_complete_i, - .dma_be_idle_i, - .irq_o - ); + idma_desc64_top #( + .AddrWidth ( AddrWidth ), + .DataWidth ( DataWidth ), + .AxiIdWidth ( AxiIdWidth ), + .idma_req_t ( idma_req_t ), + .idma_rsp_t ( idma_rsp_t ), + .axi_req_t ( axi_req_t ), + .axi_rsp_t ( axi_rsp_t ), + .axi_ar_chan_t ( axi_ar_chan_t ), + .axi_r_chan_t ( axi_r_chan_t ), + .reg_req_t ( reg_req_t ), + .reg_rsp_t ( reg_rsp_t ), + .InputFifoDepth ( InputFifoDepth ), + .PendingFifoDepth ( PendingFifoDepth ) + ) i_dma_desc64 ( + .clk_i , + .rst_ni , + .master_req_o , + .master_rsp_i , + .axi_ar_id_i , + .axi_aw_id_i , + .slave_req_i , + .slave_rsp_o , + .idma_req_o , + .idma_req_valid_o, + .idma_req_ready_i, + .idma_rsp_i , + .idma_rsp_valid_i, + .idma_rsp_ready_o, + .idma_busy_i , + .irq_o + ); endmodule diff --git a/src/frontend/desc64/idma_desc64_synth_pkg.sv b/src/frontend/desc64/idma_desc64_synth_pkg.sv index ff075c64..0ec1cf6b 100644 --- a/src/frontend/desc64/idma_desc64_synth_pkg.sv +++ b/src/frontend/desc64/idma_desc64_synth_pkg.sv @@ -1,41 +1,38 @@ -// Copyright 2022 ETH Zurich and University of Bologna. +// Copyright 2023 ETH Zurich and University of Bologna. // Solderpad Hardware License, Version 0.51, see LICENSE for details. // SPDX-License-Identifier: SHL-0.51 // Authors: -// - Axel Vanoni +// - Axel Vanoni /// synth package package idma_desc64_synth_pkg; `include "register_interface/typedef.svh" - - localparam int unsigned AddrWidth = 64; - localparam int unsigned DataWidth = 64; - localparam int unsigned StrbWidth = DataWidth / 8; - localparam int unsigned OneDLength = 32; - localparam int unsigned IdWidth = 8; + `include "axi/typedef.svh" + `include "idma/typedef.svh" + + localparam int unsigned AddrWidth = 64; + localparam int unsigned DataWidth = 64; + localparam int unsigned StrbWidth = DataWidth / 8; + localparam int unsigned OneDLength = 32; + localparam int unsigned IdWidth = 8; + localparam int unsigned UserWidth = 1; + localparam int unsigned TFLenWidth = 32; + localparam int unsigned InputFifoDepth = 8; + localparam int unsigned PendingFifoDepth = 8; typedef logic [AddrWidth-1:0] addr_t; typedef logic [DataWidth-1:0] data_t; typedef logic [StrbWidth-1:0] strb_t; typedef logic [OneDLength-1:0] length_t; typedef logic [IdWidth-1:0] id_t; + typedef logic [UserWidth-1:0] user_t; + typedef logic [TFLenWidth-1:0] tf_len_t; `REG_BUS_TYPEDEF_ALL(reg, addr_t, data_t, strb_t) - - typedef struct packed { - id_t id; - addr_t src; - addr_t dst; - length_t num_bytes; - axi_pkg::cache_t src_cache; - axi_pkg::cache_t dst_cache; - axi_pkg::burst_t src_burst; - axi_pkg::burst_t dst_burst; - logic decouple_rw; - logic deburst; - logic serialize; - } burst_req_t; + `AXI_TYPEDEF_ALL_CT(axi, axi_req_t, axi_rsp_t, addr_t, id_t, data_t, strb_t, user_t) + `IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, id_t, addr_t, tf_len_t) + `IDMA_TYPEDEF_FULL_RSP_T(idma_rsp_t, addr_t) endpackage diff --git a/src/frontend/desc64/idma_desc64_top.sv b/src/frontend/desc64/idma_desc64_top.sv new file mode 100644 index 00000000..629ed6a2 --- /dev/null +++ b/src/frontend/desc64/idma_desc64_top.sv @@ -0,0 +1,475 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Authors: +// - Axel Vanoni + +`include "common_cells/registers.svh" +`include "common_cells/assertions.svh" + +/// This module serves as a descriptor-based frontend for the iDMA in the CVA6-core +module idma_desc64_top #( + /// Width of the addresses + parameter int unsigned AddrWidth = 64 , + /// Width of a data item on the AXI bus + parameter int unsigned DataWidth = 64 , + /// Width an AXI ID + parameter int unsigned AxiIdWidth = 3 , + /// burst request type. See the documentation of the idma backend for details + parameter type idma_req_t = logic, + /// burst response type. See the documentation of the idma backend for details + parameter type idma_rsp_t = logic, + /// regbus interface types. Use the REG_BUS_TYPEDEF macros to define the types + /// or see the idma backend documentation for more details + parameter type reg_rsp_t = logic, + parameter type reg_req_t = logic, + /// AXI interface types used for fetching descriptors. + /// Use the AXI_TYPEDEF_ALL macros to define the types + parameter type axi_rsp_t = logic, + parameter type axi_req_t = logic, + parameter type axi_ar_chan_t = logic, + parameter type axi_r_chan_t = logic, + /// Specifies the depth of the fifo behind the descriptor address register + parameter int unsigned InputFifoDepth = 8, + /// Specifies the buffer size of the fifo that tracks requests submitted to the backend + parameter int unsigned PendingFifoDepth = 8, + /// How many requests the backend might have at the same time in its buffers. + /// Usually, `NumAxInFlight + BufferDepth` + parameter int unsigned BackendDepth = 0, + /// Specifies how many descriptors may be fetched speculatively + parameter int unsigned NSpeculation = 4 +)( + /// clock + input logic clk_i , + /// reset + input logic rst_ni , + + /// axi interface used for fetching descriptors + /// master pair + /// master request + output axi_req_t master_req_o , + /// master response + input axi_rsp_t master_rsp_i , + /// ID to be used by the read channel + input logic [AxiIdWidth-1:0] axi_ar_id_i , + /// ID to be used by the write channel + input logic [AxiIdWidth-1:0] axi_aw_id_i , + /// regbus interface + /// slave pair + /// The slave interface exposes two registers: One address register to + /// write a descriptor address to process and a status register that + /// exposes whether the DMA is busy on bit 0 and whether FIFOs are full + /// on bit 1. + /// master request + input reg_req_t slave_req_i , + /// master response + output reg_rsp_t slave_rsp_o , + + /// backend interface + /// burst request submission + /// burst request data. See iDMA backend documentation for fields + output idma_req_t idma_req_o , + /// valid signal for the backend data submission + output logic idma_req_valid_o, + /// ready signal for the backend data submission + input logic idma_req_ready_i, + /// status information from the backend + input idma_rsp_t idma_rsp_i , + /// valid signal for the backend response + input logic idma_rsp_valid_i, + /// ready signal for the backend response + output logic idma_rsp_ready_o, + /// whether the backend is currently busy + input logic idma_busy_i , + + /// Event: irq + output logic irq_o +); + +/// Specifies how many unsent AWs/Ws are allowed +localparam int unsigned MaxAWWPending = BackendDepth; + +typedef logic [AddrWidth-1:0] addr_t; + +/// Descriptor layout +typedef struct packed { + /// Flags for this request. Currently, the following are defined: + /// bit 0 set to trigger an irq on completion, unset to not be notified + /// bits 2:1 burst type for source, fixed: 00, incr: 01, wrap: 10 + /// bits 4:3 burst type for destination, fixed: 00, incr: 01, wrap: 10 + /// for a description of these modes, check AXI-Pulp documentation + /// bit 5 set to decouple reads and writes in the backend + /// bit 6 set to serialize requests. Not setting might violate AXI spec + /// bit 7 set to deburst (each burst is split into own transfer) + /// for a more thorough description, refer to the iDMA backend documentation + /// bits 11:8 Bitfield for AXI cache attributes for the source + /// bits 15:12 Bitfield for AXI cache attributes for the destination + /// bits of the bitfield (refer to AXI-Pulp for a description): + /// bit 0: cache bufferable + /// bit 1: cache modifiable + /// bit 2: cache read alloc + /// bit 3: cache write alloc + /// bits 23:16 AXI ID used for the transfer + /// bits 31:24 unused/reserved + logic [31:0] flags; + /// length of request in bytes + logic [31:0] length; + /// address of next descriptor, 0xFFFF_FFFF_FFFF_FFFF for last descriptor in chain + addr_t next; + /// source address to copy from + addr_t src_addr; + /// destination address to copy to + addr_t dest_addr; +} descriptor_t; + +idma_req_t idma_req; +logic idma_req_valid; +logic idma_req_ready; +logic idma_req_inflight; +logic gated_r_valid, gated_r_ready; + +logic do_irq; +logic do_irq_valid; +logic do_irq_ready; + +addr_t queued_addr; +logic queued_addr_valid; +logic queued_addr_ready; +addr_t next_addr_from_desc; +logic next_addr_from_desc_valid; +logic ar_busy; +addr_t feedback_addr; +logic feedback_addr_valid; +logic feedback_addr_ready; +addr_t next_wb_addr; +logic next_wb_addr_valid; +logic next_wb_addr_ready; + +`define MAX(a, b) (a) > (b) ? a : b + +localparam int unsigned PendingFifoDepthBits = `MAX($clog2(PendingFifoDepth), 1); + +logic [PendingFifoDepthBits-1:0] idma_req_used; +logic [PendingFifoDepthBits:0] idma_req_available; + +logic [1:0] ws_per_writeback; +// one bit extra for the 32 bit case +logic [$clog2(MaxAWWPending):0] w_counter_q, w_counter_d; +logic aw_tx; +logic w_tx; + +addr_t input_addr; +logic input_addr_valid, input_addr_ready; + +logic do_irq_out; + +idma_desc64_reg_pkg::idma_desc64_reg2hw_t reg2hw; +idma_desc64_reg_pkg::idma_desc64_hw2reg_t hw2reg; + +addr_t aw_addr; + +always_comb begin : proc_available + idma_req_available = PendingFifoDepth - idma_req_used - idma_req_inflight; + if (idma_req_used == '0) begin + if (idma_req_ready) begin + idma_req_available = PendingFifoDepth - idma_req_inflight; + end else begin + idma_req_available = '0; + end + end else if (idma_req_used == PendingFifoDepth && idma_req_inflight) begin + idma_req_available = '0; + end +end + +always_comb begin : proc_aw + master_req_o.aw = '0; + master_req_o.aw.id = axi_aw_id_i; + master_req_o.aw.addr = aw_addr; + master_req_o.aw.size = (DataWidth == 32) ? 3'b010 : 3'b011; + master_req_o.aw.len = (DataWidth == 32) ? 'b1 : 'b0; +end + +assign master_req_o.w_valid = w_counter_q > 0; +assign aw_tx = master_req_o.aw_valid && master_rsp_i.aw_ready; +assign w_tx = master_req_o.w_valid && master_rsp_i.w_ready; + +always_comb begin : proc_w_counter + w_counter_d = w_counter_q; + if (aw_tx && w_tx) begin + w_counter_d = w_counter_q + ws_per_writeback - 'b1; + end else if (aw_tx) begin + w_counter_d = w_counter_q + ws_per_writeback; + end else if (w_tx) begin + w_counter_d = w_counter_q - 'b1; + end +end + +if (DataWidth == 32) begin : gen_aw_w_chan_32 + logic w_is_last_q, w_is_last_d; + assign ws_per_writeback = 2'd2; + // writeback is 64 bits, so toggle last after sending one word + always_comb begin : proc_is_last + w_is_last_d = w_is_last_q; + if (master_req_o.w_valid && master_rsp_i.w_ready) begin + w_is_last_d = !w_is_last_q; + end + end + + always_comb begin : proc_w + master_req_o.w = '0; + master_req_o.w.data = '1; + master_req_o.w.strb = 4'hf; + master_req_o.w.last = w_is_last_q; + end + `FF(w_is_last_q, w_is_last_d, 1'b0) +end else begin : gen_aw_w_chan + assign ws_per_writeback = 2'd1; + always_comb begin : proc_w + master_req_o.w = '0; + master_req_o.w.data = '0; + master_req_o.w.data[63:0] = 64'hffff_ffff_ffff_ffff; + master_req_o.w.strb = 'hff; + master_req_o.w.last = 1'b1; + end +end + +assign hw2reg.status.busy.d = queued_addr_valid || + next_wb_addr_valid || + idma_req_valid_o || + master_req_o.b_ready || + master_req_o.aw_valid || + w_counter_q > 0 || + idma_busy_i || + ar_busy; + +assign hw2reg.status.busy.de = 1'b1; +assign hw2reg.status.fifo_full.d = !input_addr_ready; +assign hw2reg.status.fifo_full.de = 1'b1; + +assign input_addr = reg2hw.desc_addr.q; + +idma_desc64_reg_wrapper #( + .reg_req_t(reg_req_t), + .reg_rsp_t(reg_rsp_t) +) i_reg_wrapper ( + .clk_i, + .rst_ni, + .reg_req_i (slave_req_i), + .reg_rsp_o (slave_rsp_o), + .reg2hw_o (reg2hw), + .hw2reg_i (hw2reg), + .devmode_i (1'b0), + .input_addr_valid_o (input_addr_valid), + .input_addr_ready_i (input_addr_ready) +); + +if (NSpeculation == 0) begin : gen_no_spec + +assign master_req_o.r_ready = gated_r_ready; +assign gated_r_valid = master_rsp_i.r_valid; +idma_desc64_ar_gen #( + .DataWidth (DataWidth), + .descriptor_t (descriptor_t), + .axi_ar_chan_t(axi_ar_chan_t), + .axi_id_t (logic [AxiIdWidth-1:0]), + .usage_t (logic [$bits(idma_req_available)-1:0]), + .addr_t (addr_t) +) i_ar_gen ( + .clk_i, + .rst_ni, + .axi_ar_chan_o (master_req_o.ar), + .axi_ar_chan_valid_o (master_req_o.ar_valid), + .axi_ar_chan_ready_i (master_rsp_i.ar_ready), + .axi_ar_id_i, + .queued_address_i (queued_addr), + .queued_address_valid_i (queued_addr_valid), + .queued_address_ready_o (queued_addr_ready), + .next_address_from_descriptor_i (next_addr_from_desc), + .next_address_from_descriptor_valid_i(next_addr_from_desc_valid), + .idma_req_available_slots_i (idma_req_available), + .feedback_addr_o (feedback_addr), + .feedback_addr_valid_o (feedback_addr_valid), + .busy_o (ar_busy) +); +end else begin : gen_spec + +typedef logic [$clog2(NSpeculation + 1)-1:0] flush_t; + +flush_t n_requests_to_flush; +logic n_requests_to_flush_valid; + + +idma_desc64_ar_gen_prefetch #( + .DataWidth (DataWidth), + .NSpeculation (NSpeculation), + .descriptor_t (descriptor_t), + .axi_ar_chan_t(axi_ar_chan_t), + .axi_id_t (logic [AxiIdWidth-1:0]), + .usage_t (logic [$bits(idma_req_available)-1:0]), + .addr_t (addr_t), + .flush_t (flush_t) +) i_ar_gen ( + .clk_i, + .rst_ni, + .axi_ar_chan_o (master_req_o.ar), + .axi_ar_chan_valid_o (master_req_o.ar_valid), + .axi_ar_chan_ready_i (master_rsp_i.ar_ready), + .axi_ar_id_i, + .queued_address_i (queued_addr), + .queued_address_valid_i (queued_addr_valid), + .queued_address_ready_o (queued_addr_ready), + .next_address_from_descriptor_i (next_addr_from_desc), + .next_address_from_descriptor_valid_i(next_addr_from_desc_valid), + .idma_req_available_slots_i (idma_req_available), + .n_requests_to_flush_o (n_requests_to_flush), + .n_requests_to_flush_valid_o (n_requests_to_flush_valid), + .feedback_addr_o (feedback_addr), + .feedback_addr_valid_o (feedback_addr_valid), + .busy_o (ar_busy) +); + +idma_desc64_reader_gater #( + .flush_t(flush_t) +) i_reader_gater ( + .clk_i, + .rst_ni, + .n_to_flush_i (n_requests_to_flush), + .n_to_flush_valid_i(n_requests_to_flush_valid), + .r_valid_i (master_rsp_i.r_valid), + .r_ready_o (master_req_o.r_ready), + .r_valid_o (gated_r_valid), + .r_ready_i (gated_r_ready), + .r_last_i (master_rsp_i.r.last) +); + + +end + +idma_desc64_reader #( + .AddrWidth (AddrWidth), + .DataWidth (DataWidth), + .idma_req_t (idma_req_t), + .descriptor_t(descriptor_t), + .axi_r_chan_t(axi_r_chan_t) +) i_reader ( + .clk_i, + .rst_ni, + .r_chan_i (master_rsp_i.r), + .r_chan_valid_i (gated_r_valid), + .r_chan_ready_o (gated_r_ready), + .idma_req_o (idma_req), + .idma_req_valid_o (idma_req_valid), + .idma_req_ready_i (idma_req_ready), + .next_descriptor_addr_o (next_addr_from_desc), + .next_descriptor_addr_valid_o(next_addr_from_desc_valid), + .do_irq_o (do_irq), + .do_irq_valid_o (do_irq_valid), + .idma_req_inflight_o (idma_req_inflight) +); + +stream_fifo #( + .FALL_THROUGH (1'b1), + .DEPTH (InputFifoDepth), + .T (addr_t) +) i_input_addr_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (/* unconnected */), + .data_i (input_addr), + .valid_i (input_addr_valid), + .ready_o (input_addr_ready), + .data_o (queued_addr), + .valid_o (queued_addr_valid), + .ready_i (queued_addr_ready) +); + +stream_fifo #( + .FALL_THROUGH (1'b1), + .DEPTH (PendingFifoDepth + BackendDepth), + .T (addr_t) +) i_pending_addr_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (/* unconnected */), + .data_i (feedback_addr), + .valid_i (feedback_addr_valid), + .ready_o (feedback_addr_ready), + .data_o (next_wb_addr), + .valid_o (next_wb_addr_valid), + .ready_i (next_wb_addr_ready && idma_rsp_valid_i) +); + +stream_fifo #( + .FALL_THROUGH (1'b0), + .DEPTH (PendingFifoDepth), + .T (idma_req_t) +) i_idma_request_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (idma_req_used), + .data_i (idma_req), + .valid_i (idma_req_valid), + .ready_o (idma_req_ready), + .data_o (idma_req_o), + .valid_o (idma_req_valid_o), + .ready_i (idma_req_ready_i) +); + +stream_fifo #( + .FALL_THROUGH (1'b0), + .DEPTH (PendingFifoDepth + MaxAWWPending + BackendDepth), + .T (logic) +) i_irq_fifo ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (/* unconnected */), + .data_i (do_irq), + .valid_i (do_irq_valid), + .ready_o (do_irq_ready), + .data_o (do_irq_out), + .valid_o (master_req_o.b_ready), + .ready_i (master_rsp_i.b_valid) +); + +stream_fifo #( + .FALL_THROUGH (1'b0), + .DEPTH (MaxAWWPending), + .T (addr_t) +) i_aw_addrs ( + .clk_i, + .rst_ni, + .flush_i (1'b0), + .testmode_i(1'b0), + .usage_o (/* unconnected */), + .data_i (next_wb_addr), + .valid_i (next_wb_addr_valid && idma_rsp_valid_i), + .ready_o (next_wb_addr_ready), + .data_o (aw_addr), + .valid_o (master_req_o.aw_valid), + .ready_i (master_rsp_i.aw_ready) +); + +`FF(w_counter_q, w_counter_d, '0); + + +assign idma_rsp_ready_o = next_wb_addr_ready && next_wb_addr_valid; +assign irq_o = do_irq_out && master_req_o.b_ready && master_rsp_i.b_valid; + +// The three fifos for idma_req, irqs and feedback addresses must fill +// and empty in lockstep. Capacity is tested at the idma_req fifo, the +// other two ready signals are ignored. +// pragma translate_off +`ASSERT_IF(NoIrqDropped, do_irq_ready, do_irq_valid); +`ASSERT_IF(NoAddrDropped, feedback_addr_ready, feedback_addr_valid); +// pragma translate_on + +endmodule diff --git a/src/systems/cva6_desc/dma_reg_to_axi.sv b/src/future/idma_reg_to_axi.sv similarity index 96% rename from src/systems/cva6_desc/dma_reg_to_axi.sv rename to src/future/idma_reg_to_axi.sv index a66aeab3..3a836a3b 100644 --- a/src/systems/cva6_desc/dma_reg_to_axi.sv +++ b/src/future/idma_reg_to_axi.sv @@ -3,13 +3,13 @@ // SPDX-License-Identifier: SHL-0.51 // Authors: -// - Axel Vanoni +// - Axel Vanoni `include "common_cells/registers.svh" `include "idma/guard.svh" /// Hacky register interface to AXI converter -module dma_reg_to_axi #( +module idma_reg_to_axi #( parameter type axi_req_t = logic, parameter type axi_rsp_t = logic, parameter type reg_req_t = logic, @@ -92,7 +92,7 @@ module dma_reg_to_axi #( /* check that we don't get any errors in the simulation */ `IDMA_NONSYNTH_BLOCK( - assert property (@(posedge clk_i) (axi_rsp_i.r_valid && axi_req_o.r_ready) |-> \ + assert property (@(posedge clk_i) (axi_rsp_i.r_valid && axi_req_o.r_ready) |-> (axi_rsp_i.r.resp == axi_pkg::RESP_OKAY)); ) @@ -102,4 +102,4 @@ module dma_reg_to_axi #( `FF(a_acked_q, a_acked_d, '0); `FF(d_acked_q, d_acked_d, '0); -endmodule : dma_reg_to_axi +endmodule diff --git a/src/include/idma/tracer.svh b/src/include/idma/tracer.svh new file mode 100644 index 00000000..d8f6740d --- /dev/null +++ b/src/include/idma/tracer.svh @@ -0,0 +1,101 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Thomas Benz + +// Macro holding all the resources for the iDMA backend tracer +`ifndef IDMA_TRACER_SVH_ +`define IDMA_TRACER_SVH_ + +// largest type to trace +`define IDMA_TRACER_MAX_TYPE_WIDTH 1024 +`define IDMA_TRACER_MAX_TYPE logic [`IDMA_TRACER_MAX_TYPE_WIDTH-1:0] + +// string assembly function +`define IDMA_TRACER_STR_ASSEMBLY(__dict, __cond) \ + if(__cond) begin \ + trace = $sformatf("%s'%s':{", trace, `"__dict`"); \ + foreach(__dict``[key]) trace = $sformatf("%s'%s': 0x%0x,", trace, key, __dict``[key]); \ + trace = $sformatf("%s},", trace); \ + end + +// helper to clear a condition +`define IDMA_TRACER_CLEAR_COND(__cond) \ + if(__cond) begin \ + __cond = ~__cond; \ + end + +// The tracer for the iDMA +`define IDMA_TRACER(__backend_inst, __out_f_name) \ +`ifndef SYNTHESYS \ +`ifndef VERILATOR \ + initial begin : inital_tracer \ + automatic bit first_iter = 1; \ + automatic integer tf; \ + automatic `IDMA_TRACER_MAX_TYPE cnst [string]; \ + automatic `IDMA_TRACER_MAX_TYPE meta [string]; \ + automatic `IDMA_TRACER_MAX_TYPE busy [string]; \ + automatic `IDMA_TRACER_MAX_TYPE axib [string]; \ + automatic string trace; \ + #0; \ + tf = $fopen(__out_f_name, "w"); \ + $display("[Tracer] Logging iDMA backend %s to %s", `"__backend_inst`", __out_f_name); \ + forever begin \ + @(posedge __backend_inst``.clk_i); \ + if(__backend_inst``.rst_ni & |__backend_inst``.busy_o) begin \ + /* Trace */ \ + trace = "{"; \ + /* Constants */ \ + cnst = '{ \ + "inst" : `"__backend_inst`", \ + "data_width" : __backend_inst``.DataWidth, \ + "addr_width" : __backend_inst``.AddrWidth, \ + "user_width" : __backend_inst``.UserWidth, \ + "axi_id_width" : __backend_inst``.AxiIdWidth, \ + "num_ax_in_flight" : __backend_inst``.NumAxInFlight, \ + "buffer_depth" : __backend_inst``.BufferDepth, \ + "tf_len_width" : __backend_inst``.TFLenWidth, \ + "mem_sys_depth" : __backend_inst``.MemSysDepth, \ + "rw_coupling_avail" : __backend_inst``.RAWCouplingAvail, \ + "mask_invalid_data" : __backend_inst``.MaskInvalidData, \ + "hardware_legalizer" : __backend_inst``.HardwareLegalizer, \ + "reject_zero_transfers" : __backend_inst``.RejectZeroTransfers, \ + "error_cap" : __backend_inst``.ErrorCap, \ + "print_fifo_info" : __backend_inst``.PrintFifoInfo \ + }; \ + meta = '{ \ + "time" : $time() \ + }; \ + busy = '{ \ + "buffer" : __backend_inst``.busy_o.buffer_busy, \ + "r_dp" : __backend_inst``.busy_o.r_dp_busy, \ + "w_dp" : __backend_inst``.busy_o.w_dp_busy, \ + "r_leg" : __backend_inst``.busy_o.r_leg_busy, \ + "w_leg" : __backend_inst``.busy_o.w_leg_busy, \ + "eh_fsm" : __backend_inst``.busy_o.eh_fsm_busy, \ + "eh_cnt" : __backend_inst``.busy_o.eh_cnt_busy, \ + "raw_coupler" : __backend_inst``.busy_o.raw_coupler_busy \ + }; \ + axib = '{ \ + "w_valid" : __backend_inst``.axi_req_o.w_valid, \ + "w_ready" : __backend_inst``.axi_rsp_i.w_ready, \ + "w_strb" : __backend_inst``.axi_req_o.w.strb, \ + "r_valid" : __backend_inst``.axi_rsp_i.r_valid, \ + "r_ready" : __backend_inst``.axi_req_o.r_ready \ + }; \ + /* Assembly */ \ + `IDMA_TRACER_STR_ASSEMBLY(cnst, first_iter); \ + `IDMA_TRACER_STR_ASSEMBLY(meta, 1); \ + `IDMA_TRACER_STR_ASSEMBLY(busy, 1); \ + `IDMA_TRACER_STR_ASSEMBLY(axib, 1); \ + `IDMA_TRACER_CLEAR_COND(first_iter); \ + /* Commit */ \ + $fwrite(tf, $sformatf("%s}\n", trace)); \ + end \ + end \ + end \ +`endif \ +`endif + +`endif diff --git a/src/systems/cva6_desc/dma_desc_wrap.sv b/src/systems/cva6_desc/dma_desc_wrap.sv deleted file mode 100644 index de1441ff..00000000 --- a/src/systems/cva6_desc/dma_desc_wrap.sv +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright 2022 ETH Zurich and University of Bologna. -// Solderpad Hardware License, Version 0.51, see LICENSE for details. -// SPDX-License-Identifier: SHL-0.51 - -// Authors: -// - Axel Vanoni - -`include "axi/assign.svh" -`include "axi/typedef.svh" -`include "idma/typedef.svh" -`include "register_interface/typedef.svh" -`include "common_cells/registers.svh" - -/// Wrapper for the iDMA -module dma_desc_wrap #( - parameter int AxiAddrWidth = 64, - parameter int AxiDataWidth = 64, - parameter int AxiUserWidth = -1, - parameter int AxiIdWidth = -1, - parameter int AxiSlvIdWidth = -1, - parameter type mst_aw_chan_t = logic, // AW Channel Type, master port - parameter type mst_w_chan_t = logic, // W Channel Type, all ports - parameter type mst_b_chan_t = logic, // B Channel Type, master port - parameter type mst_ar_chan_t = logic, // AR Channel Type, master port - parameter type mst_r_chan_t = logic, // R Channel Type, master port - parameter type axi_mst_req_t = logic, - parameter type axi_mst_rsp_t = logic, - parameter type axi_slv_req_t = logic, - parameter type axi_slv_rsp_t = logic -) ( - input logic clk_i, - input logic rst_ni, - input logic testmode_i, - output logic irq_o, - output axi_mst_req_t axi_master_req_o, - input axi_mst_rsp_t axi_master_rsp_i, - input axi_slv_req_t axi_slave_req_i, - output axi_slv_rsp_t axi_slave_rsp_o -); - import axi_pkg::*; - - typedef logic [AxiAddrWidth-1:0] addr_t; - typedef logic [AxiDataWidth-1:0] data_t; - typedef logic [(AxiDataWidth/8)-1:0] strb_t; - typedef logic [AxiUserWidth-1:0] user_t; - // has one less bit for the mux not to error - typedef logic [AxiIdWidth-2:0] post_mux_id_t; - - axi_slv_req_t axi_slv_req; - axi_slv_rsp_t axi_slv_rsp; - - `AXI_TYPEDEF_ALL(dma_axi_mst_post_mux, addr_t, post_mux_id_t, data_t, strb_t, user_t) - dma_axi_mst_post_mux_req_t axi_fe_mst_req; - dma_axi_mst_post_mux_resp_t axi_fe_mst_rsp; - dma_axi_mst_post_mux_req_t axi_be_mst_req; - dma_axi_mst_post_mux_resp_t axi_be_mst_rsp; - - `REG_BUS_TYPEDEF_ALL(dma_reg, addr_t, data_t, strb_t) - dma_reg_req_t dma_reg_mst_req; - dma_reg_rsp_t dma_reg_mst_rsp; - dma_reg_req_t dma_reg_slv_req; - dma_reg_rsp_t dma_reg_slv_rsp; - - // iDMA struct definitions - localparam int unsigned TFLenWidth = 32; - typedef logic [TFLenWidth-1:0] tf_len_t; - typedef logic [RepWidth-1:0] reps_t; - typedef logic [StrideWidth-1:0] strides_t; - - // iDMA request / response types - `IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, post_mux_id_t, addr_t, tf_len_t) - `IDMA_TYPEDEF_FULL_RSP_T(idma_rsp_t, addr_t) - - burst_req_t dma_be_req; - logic dma_be_tx_complete; - logic dma_be_valid; - logic dma_be_ready; - idma_pkg::idma_busy_t idma_busy; - - idma_desc64_top #( - .AddrWidth (AxiAddrWidth) , - .burst_req_t(burst_req_t) , - .reg_req_t (dma_reg_req_t), - .reg_rsp_t (dma_reg_rsp_t) - ) i_dma_desc64 ( - .clk_i, - .rst_ni, - .master_req_o ( dma_reg_mst_req ), - .master_rsp_i ( dma_reg_mst_rsp ), - .slave_req_i ( dma_reg_slv_req ), - .slave_rsp_o ( dma_reg_slv_rsp ), - .dma_be_tx_complete_i ( dma_be_tx_complete ), - .dma_be_idle_i ( ~|idma_busy ), - .dma_be_valid_o ( dma_be_valid ), - .dma_be_ready_i ( dma_be_ready ), - .dma_be_req_o ( dma_be_req ), - .irq_o ( irq_o ) - ); - - idma_backend #( - .DataWidth ( AxiDataWidth ), - .AddrWidth ( AxiAddrWidth ), - .UserWidth ( AxiUserWidth ), - .AxiIdWidth ( AxiIdWidth-1 ), - .NumAxInFlight ( 2 ), - .BufferDepth ( 3 ), - .TFLenWidth ( TFLenWidth ), - .RAWCouplingAvail ( 1'b1 ), - .MaskInvalidData ( 1'b1 ), - .HardwareLegalizer ( 1'b1 ), - .RejectZeroTransfers ( 1'b1 ), - .MemSysDepth ( 32'd0 ), - .ErrorCap ( idma_pkg::NO_ERROR_HANDLING ), - .idma_req_t ( idma_req_t ), - .idma_rsp_t ( idma_rsp_t ), - .idma_eh_req_t ( idma_pkg::idma_eh_req_t ), - .idma_busy_t ( idma_pkg::idma_busy_t ), - .axi_req_t ( axi_slv_req_t ), - .axi_rsp_t ( axi_slv_resp_t ) - ) i_idma_backend ( - .clk_i, - .rst_ni, - .testmode_i ( testmode_i ), - - .idma_req_i ( dma_be_req ), - .req_valid_i ( dma_be_valid ), - .req_ready_o ( dma_be_ready ), - - .idma_rsp_o ( /*NOT CONNECTED*/ ), - .rsp_valid_o ( dma_be_tx_complete ), - .rsp_ready_i ( 1'b1 ), - - .idma_eh_req_i ( '0 ), // No error handling - .eh_req_valid_i( 1'b1 ), - .eh_req_ready_o( /*NOT CONNECTED*/ ), - - .axi_req_o ( axi_be_mst_req ), - .axi_rsp_i ( axi_be_mst_rsp ), - .busy_o ( idma_busy ) - ); - - axi_mux #( - .SlvAxiIDWidth(AxiIdWidth - 1), - .slv_aw_chan_t(dma_axi_mst_post_mux_aw_chan_t), - .mst_aw_chan_t(mst_aw_chan_t), - .w_chan_t (mst_w_chan_t), // same channel type for master+slave - .slv_b_chan_t (dma_axi_mst_post_mux_b_chan_t), - .mst_b_chan_t (mst_b_chan_t), - .slv_ar_chan_t(dma_axi_mst_post_mux_ar_chan_t), - .mst_ar_chan_t(mst_ar_chan_t), - .slv_r_chan_t (dma_axi_mst_post_mux_r_chan_t), - .mst_r_chan_t (mst_r_chan_t), - .slv_req_t (dma_axi_mst_post_mux_req_t), - .slv_resp_t (dma_axi_mst_post_mux_resp_t), - .mst_req_t (axi_mst_req_t), - .mst_resp_t (axi_mst_rsp_t), - .NoSlvPorts ('d2), - .MaxWTrans ('d2), - .FallThrough ('0), - .SpillAw ('b0), - .SpillW ('0), - .SpillB ('0), - .SpillAr ('b0), - .SpillR ('0) - ) i_axi_mux ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .test_i (1'b0), - .slv_reqs_i ({axi_fe_mst_req, axi_be_mst_req}), - .slv_resps_o ({axi_fe_mst_rsp, axi_be_mst_rsp}), - .mst_req_o (axi_master_req_o), - .mst_resp_i (axi_master_rsp_i) - ); - - axi_to_reg #( - .ADDR_WIDTH (AxiAddrWidth), - .DATA_WIDTH (AxiDataWidth), - .ID_WIDTH (AxiSlvIdWidth), - .USER_WIDTH (AxiUserWidth), - .AXI_MAX_WRITE_TXNS(32'd1), - .AXI_MAX_READ_TXNS (32'd1), - .DECOUPLE_W (1'b1), - .axi_req_t (axi_slv_req_t), - .axi_rsp_t (axi_slv_rsp_t), - .reg_req_t (dma_reg_req_t), - .reg_rsp_t (dma_reg_rsp_t) - ) i_axi_to_reg ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .testmode_i(1'b0), - .axi_req_i (axi_slv_req), - .axi_rsp_o (axi_slv_rsp), - .reg_req_o (dma_reg_slv_req), - .reg_rsp_i (dma_reg_slv_rsp) - ); - - dma_reg_to_axi #( - .axi_req_t (dma_axi_mst_post_mux_req_t), - .axi_rsp_t (dma_axi_mst_post_mux_resp_t), - .reg_req_t (dma_reg_req_t), - .reg_rsp_t (dma_reg_rsp_t), - .ByteWidthInPowersOfTwo($clog2(AxiDataWidth / 8)) - ) i_dma_reg_to_axi ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .axi_req_o(axi_fe_mst_req), - .axi_rsp_i(axi_fe_mst_rsp), - .reg_req_i(dma_reg_mst_req), - .reg_rsp_o(dma_reg_mst_rsp) - ); - - assign axi_slv_req = axi_slave_req_i; - assign axi_slave_rsp_o = axi_slv_rsp; - -endmodule : dma_desc_wrap diff --git a/test/frontend/tb_idma_desc64.sv b/test/frontend/tb_idma_desc64.sv deleted file mode 100644 index 4d10f478..00000000 --- a/test/frontend/tb_idma_desc64.sv +++ /dev/null @@ -1,593 +0,0 @@ -// Copyright 2022 ETH Zurich and University of Bologna. -// Solderpad Hardware License, Version 0.51, see LICENSE for details. -// SPDX-License-Identifier: SHL-0.51 - -// Authors: -// - Axel Vanoni - -`include "register_interface/typedef.svh" -`include "register_interface/assign.svh" -`include "idma/typedef.svh" - -import idma_desc64_reg_pkg::IDMA_DESC64_DESC_ADDR_OFFSET; -import idma_desc64_reg_pkg::IDMA_DESC64_STATUS_OFFSET; -import rand_verif_pkg::rand_wait; -import axi_pkg::*; -import reg_test::reg_driver; - -module tb_idma_desc64 #( - parameter integer NumberOfTests = 100, - parameter integer SimulationTimeoutCycles = 100000, - parameter integer MaxChainedDescriptors = 10, - parameter integer MinChainedDescriptors = 1 - -) (); - localparam time PERIOD = 10ns; - localparam time APPL_DELAY = PERIOD / 4; - localparam time ACQ_DELAY = PERIOD * 3 / 4; - - localparam integer RESET_CYCLES = 10; - - `REG_BUS_TYPEDEF_ALL(reg, /* addr */ logic [63:0], /* data */ logic [63:0], /* strobe */ logic [7:0]) - - typedef logic [63:0] addr_t; - typedef logic [ 2:0] axi_id_t; - - // iDMA struct definitions - localparam int unsigned TFLenWidth = 32; - typedef logic [TFLenWidth-1:0] tf_len_t; - - // iDMA request / response types - `IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, axi_id_t, addr_t, tf_len_t) - `IDMA_TYPEDEF_FULL_RSP_T(idma_rsp_t, addr_t) - - class stimulus_t; - rand addr_t base; - rand idma_req_t burst; - rand logic do_irq; - addr_t next = ~64'b0; - - // an entire descriptor of 4 words must fit before the end of memory - constraint descriptor_fits_in_memory { ~64'b0 - base > 32; } - constraint no_empty_transfers { burst.length > 0; } - constraint src_fits_in_memory { ~64'b0 - burst.src_addr > burst.length; } - constraint dst_fits_in_memory { ~64'b0 - burst.dst_addr > burst.length; } - constraint src_burst_valid { burst.opt.src.burst inside { BURST_INCR, BURST_WRAP, BURST_FIXED }; } - constraint dst_burst_valid { burst.opt.dst.burst inside { BURST_INCR, BURST_WRAP, BURST_FIXED }; } - constraint reduce_len_equal { burst.opt.beo.src_reduce_len == burst.opt.beo.dst_reduce_len; } - constraint beo_zero { burst.opt.beo.decouple_aw == '0 && burst.opt.beo.src_max_llen == '0 && burst.opt.beo.dst_max_llen == '0 && burst.opt.last == '0; } - constraint axi_params_zero_src { burst.opt.src.lock == '0 && burst.opt.src.prot == '0 && burst.opt.src.qos == '0 && burst.opt.src.region == '0; } - constraint axi_params_zero_dst { burst.opt.dst.lock == '0 && burst.opt.dst.prot == '0 && burst.opt.dst.qos == '0 && burst.opt.dst.region == '0; } - endclass - - typedef struct { - idma_req_t burst; - addr_t read_addresses[4]; - addr_t write_address; - logic [63:0] write_data; - logic did_irq; - } result_t; - result_t golden_queue[$]; - - - // clocks - logic clk; - logic rst_n; - - clk_rst_gen #( - .ClkPeriod(PERIOD), - .RstClkCycles(RESET_CYCLES) - ) i_clock_reset_generator ( - .clk_o (clk) , - .rst_no(rst_n) - ); - - // dut signals and module - REG_BUS #( - .ADDR_WIDTH(64), - .DATA_WIDTH(64) - ) i_reg_iface_bus (clk); - - reg_driver #( - .AW(64), - .DW(64), - .TA(APPL_DELAY), - .TT(ACQ_DELAY) - ) i_reg_iface_driver = new (i_reg_iface_bus); - - reg_rsp_t dma_master_response; - reg_req_t dma_master_request; - reg_rsp_t dma_slave_response; - reg_req_t dma_slave_request; - - idma_req_t dma_be_req; - - logic dma_be_tx_complete; - logic dma_be_idle; - logic dma_be_valid; - logic dma_be_ready; - logic irq; - - idma_desc64 #( - .AddrWidth (64), - .burst_req_t (idma_req_t), - .reg_rsp_t (reg_rsp_t), - .reg_req_t (reg_req_t) - ) i_dut ( - .clk_i (clk), - .rst_ni (rst_n), - .master_rsp_i (dma_master_response), - .master_req_o (dma_master_request), - .slave_req_i (dma_slave_request), - .slave_rsp_o (dma_slave_response), - .dma_be_tx_complete_i(dma_be_tx_complete), - .dma_be_idle_i (dma_be_idle), - .dma_be_valid_o (dma_be_valid), - .dma_be_ready_i (dma_be_ready), - .dma_be_req_o (dma_be_req), - .irq_o (irq) - ); - - assign dma_slave_request.addr = i_reg_iface_bus.addr; - assign dma_slave_request.write = i_reg_iface_bus.write; - assign dma_slave_request.wdata = i_reg_iface_bus.wdata; - assign dma_slave_request.wstrb = i_reg_iface_bus.wstrb; - assign dma_slave_request.valid = i_reg_iface_bus.valid; - assign i_reg_iface_bus.rdata = dma_slave_response.rdata; - assign i_reg_iface_bus.ready = dma_slave_response.ready; - assign i_reg_iface_bus.error = dma_slave_response.error; - - initial begin - dma_master_response = '0; - dma_be_tx_complete = '0; - dma_be_ready = '0; - end - - // queues for communication and data transfer - stimulus_t generated_stimuli[$][$]; - stimulus_t inflight_stimuli[$][$]; - logic inflight_be_tokens[$]; - result_t inflight_results_after_reads[$]; - result_t inflight_results_submitted_to_be[$]; - result_t result_queue[$]; - assign dma_be_idle = inflight_be_tokens.size() == 0; - - function automatic void generate_stimuli(); - repeat (NumberOfTests) begin - automatic stimulus_t current_stimulus; - automatic stimulus_t current_stimuli_group[$]; - automatic int number_of_descriptors_in_test; - - void'(std::randomize(number_of_descriptors_in_test) with { - number_of_descriptors_in_test >= MinChainedDescriptors; - number_of_descriptors_in_test <= MaxChainedDescriptors; - }); - - current_stimulus = new(); - if (!current_stimulus.randomize()) begin - $error("Couldn't randomize stimulus"); - end else begin - - current_stimuli_group.push_back(current_stimulus); - golden_queue.push_back('{ - burst: current_stimulus.burst, - read_addresses: '{ - // descriptor is four contiguous 64-bit words - current_stimulus.base, - current_stimulus.base + 8, - current_stimulus.base + 16, - current_stimulus.base + 24 - }, - write_address: current_stimulus.base, - write_data: ~64'b0, - did_irq: current_stimulus.do_irq - }); - end - - repeat (number_of_descriptors_in_test - 1) begin - current_stimulus = new(); - if (!current_stimulus.randomize()) begin - $error("Couldn't randomize stimulus"); - end else begin - // chain descriptor - current_stimuli_group[$].next = current_stimulus.base; - - current_stimuli_group.push_back(current_stimulus); - - golden_queue.push_back('{ - burst: current_stimulus.burst, - read_addresses: '{ - // descriptor is four contiguous 64-bit words - current_stimulus.base, - current_stimulus.base + 8, - current_stimulus.base + 16, - current_stimulus.base + 24 - }, - write_address: current_stimulus.base, - write_data: ~64'b0, - did_irq: current_stimulus.do_irq - }); - end - end - generated_stimuli.push_back(current_stimuli_group); - end - // make the last stimulus generate an irq to simplify the IRQ - // acquisition - generated_stimuli[$][$].do_irq = 1'b1; - golden_queue[$].did_irq = 1'b1; - endfunction : generate_stimuli - - task apply_stimuli(); - fork - regbus_slave_interaction(); - regbus_master_apply_reads_and_writes(); - backend_tx_done_notifier(); - backend_acceptor(); - join - endtask - - task collect_responses(); - fork - regbus_master_acquire_reads(); - regbus_master_acquire_writes_and_irqs(); - backend_submission_monitor(); - acquire_bursts(); - join - endtask - - // regbus slave interaction (we're acting as master) - task regbus_slave_interaction(); - automatic stimulus_t current_stimulus_group[$]; - i_reg_iface_driver.reset_master(); - @(posedge rst_n); - - forever begin - automatic logic [63:0] status; - automatic addr_t start_addr; - automatic logic error; - - wait (generated_stimuli.size() > '0); - - i_reg_iface_driver.send_read( - .addr (IDMA_DESC64_STATUS_OFFSET), - .data (status) , - .error(error) - ); - if ((status & 64'b10) == 64'b0) begin - // the fifos are not full yet, so we can submit - current_stimulus_group = generated_stimuli.pop_front(); - - i_reg_iface_driver.send_write( - .addr (IDMA_DESC64_DESC_ADDR_OFFSET) , - .data (current_stimulus_group[0].base), - .strb (8'hff) , - .error(error) - ); - inflight_stimuli.push_back(current_stimulus_group); - end - end - endtask - - function automatic logic [63:0] stimulus_to_flag_bits(stimulus_t stim); - // Copied from frontend: - // bit 0 set to trigger an irq on completion, unset to not be notified - // bits 2:1 burst type for source, fixed: 00, incr: 01, wrap: 10 - // bits 4:3 burst type for destination, fixed: 00, incr: 01, wrap: 10 - // for a description of these modes, check AXI-Pulp documentation - // bit 5 set to decouple reads and writes in the backend - // bit 6 set to serialize requests. Not setting might violate AXI spec - // bit 7 set to deburst (each burst is split into own transfer) - // for a more thorough description, refer to the iDMA backend documentation - // bits 11:8 Bitfield for AXI cache attributes for the source - // bits 15:12 Bitfield for AXI cache attributes for the destination - // bits of the bitfield (refer to AXI-Pulp for a description): - // bit 0: cache bufferable - // bit 1: cache modifiable - // bit 2: cache read alloc - // bit 3: cache write alloc - // bits 23:16 AXI ID used for the transfer - // bits 31:26 unused/reserved - automatic logic [63:0] result = '0; - automatic logic [31:0] flags = '0; - - flags[0] = stim.do_irq; - flags[2:1] = stim.burst.opt.src.burst; - flags[4:3] = stim.burst.opt.dst.burst; - flags[5] = stim.burst.opt.beo.decouple_rw; - flags[6] = '0; - // flags[6] = stim.burst.opt.beo.serialize; - flags[7] = stim.burst.opt.beo.src_reduce_len; - flags[11:8] = stim.burst.opt.src.cache; - flags[15:12] = stim.burst.opt.dst.cache; - flags[23:16] = stim.burst.opt.axi_id; - flags[31:26] = '0; - - result[31:0] = stim.burst.length; - result[63:32] = flags; - return result; - endfunction - // regbus master interaction read and write application (we're acting as slave) - task regbus_master_apply_reads_and_writes(); - automatic stimulus_t current_stimulus_group[$]; - automatic stimulus_t current_stimulus; - automatic int read_index; - - @(posedge rst_n); - dma_master_response.ready = '0; - dma_master_response.rdata = '0; - dma_master_response.error = '0; - - wait (inflight_stimuli.size() > 0); - current_stimulus_group = inflight_stimuli.pop_front(); - current_stimulus = current_stimulus_group.pop_front(); - - forever begin - automatic addr_t read_addr; - automatic logic [63:0] read_result; - - @(posedge clk); - #(APPL_DELAY); - dma_master_response.ready = 1'b0; - - wait (dma_master_request.valid); - @(posedge clk) - #(APPL_DELAY); - if (!dma_master_request.write) begin - // we have read everything from this stimulus packet, go to the - // next one - if (read_index == 4) begin - // get the next transfer group if we are done with the current group - if (current_stimulus_group.size() == '0) begin - wait (inflight_stimuli.size() > '0); - current_stimulus_group = inflight_stimuli.pop_front(); - end - - current_stimulus = current_stimulus_group.pop_front(); - read_index = 0; - end - - case (read_index) - 0: begin : flags_and_length - dma_master_response.rdata = stimulus_to_flag_bits(current_stimulus); - end : flags_and_length - 1: begin : next - if (current_stimulus_group.size() == '0) begin - dma_master_response.rdata = ~64'b0; - end else begin - dma_master_response.rdata = current_stimulus_group[0].base; - end - end : next - 2: begin : src - dma_master_response.rdata = current_stimulus.burst.src_addr; - end : src - 3: begin : dst - dma_master_response.rdata = current_stimulus.burst.dst_addr; - end : dst - default: begin - $error("The regbus master block reached an inconsistent state (%d)", read_index); - end - endcase - ++read_index; - end - dma_master_response.ready = 1'b1; - end - endtask - - task regbus_master_acquire_reads(); - automatic int read_index = '0; - automatic result_t current_result; - @(posedge rst_n); - forever begin - // wait for a read request - forever begin - @(posedge clk); - #(ACQ_DELAY); - if (dma_master_request.valid && - dma_master_response.ready && - !dma_master_request.write) break; - end - current_result.read_addresses[read_index] = dma_master_request.addr; - read_index++; - if (read_index == 4) begin - read_index = 0; - inflight_results_after_reads.push_back(current_result); - end - end - endtask - - task regbus_master_acquire_writes_and_irqs(); - // set to one to skip first submission of what would be an invalid result - automatic bit captured_irq = '1; - automatic result_t current_result; - @(posedge rst_n); - wait (inflight_results_submitted_to_be.size() > 0); - current_result = inflight_results_submitted_to_be.pop_front(); - forever begin - forever begin - @(posedge clk); - #(ACQ_DELAY); - if ((dma_master_request.valid && - dma_master_response.ready && - dma_master_request.write) || - irq) break; - end - if (irq) begin - if (captured_irq) begin - $error("Got a duplicate IRQ!"); - end else begin - current_result.did_irq = irq; - captured_irq = 1'b1; - result_queue.push_back(current_result); - wait (inflight_results_submitted_to_be.size() > 0); - current_result = inflight_results_submitted_to_be.pop_front(); - end - end else begin - // if we haven't captured an irq, we are still with the last - // result, which we now need to submit and get the next one - if (!captured_irq) begin - current_result.did_irq = 0; - result_queue.push_back(current_result); - wait (inflight_results_submitted_to_be.size() > 0); - current_result = inflight_results_submitted_to_be.pop_front(); - end - current_result.write_address = dma_master_request.addr; - current_result.write_data = dma_master_request.wdata; - captured_irq = 1'b0; - end - end - endtask - - task backend_submission_monitor(); - @(posedge rst_n); - forever begin - forever begin - @(posedge clk); - #(ACQ_DELAY); - if (dma_be_valid && dma_be_ready) break; - end - // annotate that a job has entered the backend - inflight_be_tokens.push_back(1'b1); - end - endtask - - task backend_tx_done_notifier(); - @(posedge rst_n); - forever begin - wait (inflight_be_tokens.size() > 0); - - // remove token, as we handled the request - void'(inflight_be_tokens.pop_front()); - - rand_wait(5, 20, clk); - - #(APPL_DELAY); - dma_be_tx_complete = 1'b1; - - @(posedge clk); - #(APPL_DELAY); - dma_be_tx_complete = 1'b0; - end - endtask - - task acquire_bursts(); - automatic result_t current_result; - automatic idma_req_t current_burst; - @(posedge rst_n); - forever begin - forever begin - @(posedge clk); - #(ACQ_DELAY); - if (dma_be_valid && dma_be_ready) break; - end - current_burst = dma_be_req; - wait (inflight_results_after_reads.size() > 0); - current_result = inflight_results_after_reads.pop_front(); - current_result.burst = current_burst; - inflight_results_submitted_to_be.push_back(current_result); - end - endtask - - task backend_acceptor(); - automatic result_t current_result; - @(posedge rst_n); - forever begin - wait (dma_be_valid); - @(posedge clk); - #(APPL_DELAY) - dma_be_ready = 1'b1; - @(posedge clk); - #(APPL_DELAY) - dma_be_ready = 1'b0; - end - endtask - - // score the results - initial begin : proc_scoring - static logic finished_simulation = '0; - - static int number_of_descriptors = 0; - static int read_errors = 0; - static int write_addr_errors = 0; - static int write_data_errors = 0; - static int burst_errors = 0; - static int irq_errors = 0; - - generate_stimuli(); - - fork - apply_stimuli(); - collect_responses(); - begin : watchdog - @(posedge rst_n); - repeat (SimulationTimeoutCycles) begin - @(posedge clk); - end - end : watchdog - begin : scorer - @(posedge rst_n); - - while (golden_queue.size() > '0) begin - automatic result_t golden; - automatic result_t actual; - wait (result_queue.size() > 0); - golden = golden_queue.pop_front(); - actual = result_queue.pop_front(); - if (golden.burst !== actual.burst) begin - $error("Burst mismatch @ %d:\ngolden: %p\nactual: %p", - number_of_descriptors, golden.burst, actual.burst); - ++burst_errors; - end - foreach (golden.read_addresses[i]) begin - if (golden.read_addresses[i] !== actual.read_addresses[i]) begin - $error("Read address mismatch @ %d:\ngolden: %x\nactual: %x", - number_of_descriptors, golden.read_addresses[i], actual.read_addresses[i]); - ++read_errors; - end - end - if (golden.write_address !== actual.write_address) begin - $error("Write address mismatch @ %d:\ngolden: %x\nactual: %x", - number_of_descriptors, golden.write_address, actual.write_address); - ++write_addr_errors; - end - if (golden.write_data !== actual.write_data) begin - $error("Write data mismatch @ %d:\ngolden: %x\nactual: %x", - number_of_descriptors, golden.write_data, actual.write_data); - ++write_data_errors; - end - if (golden.did_irq !== actual.did_irq) begin - $error("IRQ mismatch @ %d:\ngolden: %x\nactual: %x", - number_of_descriptors, golden.did_irq, actual.did_irq); - ++irq_errors; - end - ++number_of_descriptors; - end - // wait for frontend to signal no longer busy - forever begin - automatic logic [63:0] status; - automatic logic error; - i_reg_iface_driver.send_read( - .addr(IDMA_DESC64_STATUS_OFFSET), - .data(status), - .error(error) - ); - if (status[0] != 1'b1) break; - end - finished_simulation = 1; - end : scorer - join_any - disable fork; - if (!finished_simulation) begin - $error("Simulation timed out."); - end else begin - $display("Simulation finished in a timely manner."); - end - $display("Read address errors: %d", read_errors); - $display("Write address errors: %d", write_addr_errors); - $display("Write data errors: %d", write_data_errors); - $display("Burst errors: %d", burst_errors); - $display("IRQ errors: %d", irq_errors); - $stop(); - $finish(); - end : proc_scoring - -endmodule : tb_idma_desc64 diff --git a/test/frontend/tb_idma_desc64_bench.sv b/test/frontend/tb_idma_desc64_bench.sv new file mode 100644 index 00000000..043e79b3 --- /dev/null +++ b/test/frontend/tb_idma_desc64_bench.sv @@ -0,0 +1,966 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Axel Vanoni + +`include "register_interface/typedef.svh" +`include "register_interface/assign.svh" +`include "idma/tracer.svh" +`include "idma/typedef.svh" +`include "axi/typedef.svh" +`include "axi/assign.svh" + + + +/// Benchmarking TB for the descriptor-based frontend +module tb_idma_desc64_bench + import idma_desc64_reg_pkg::IDMA_DESC64_DESC_ADDR_OFFSET; + import idma_desc64_reg_pkg::IDMA_DESC64_STATUS_OFFSET; + import rand_verif_pkg::rand_wait; + import axi_pkg::*; + import reg_test::reg_driver; #( + parameter integer NumberOfTests = 100, + parameter integer SimulationTimeoutCycles = 1000000, + parameter integer ChainedDescriptors = 10, + parameter integer TransferLength = 1024, + parameter integer AlignmentMask = 'h0f, + parameter integer NumContiguous = 200000, + parameter integer MaxAxInFlight = 64, + parameter bit DoIRQ = 1, + parameter integer TransfersToSkip = 4, + // from frontend + parameter int unsigned InputFifoDepth = 8, + parameter int unsigned PendingFifoDepth = 8, + parameter int unsigned NSpeculation = 4, + // from backend tb + parameter int unsigned BufferDepth = 3, + parameter int unsigned NumAxInFlight = NSpeculation > 3 ? NSpeculation : 3, + parameter int unsigned TFLenWidth = 32, + parameter int unsigned MemSysDepth = 0, + parameter int unsigned MemNumReqOutst = 1, + parameter int unsigned MemLatency = 0, + parameter int unsigned WatchDogNumCycles = 100, + parameter bit MaskInvalidData = 1, + parameter bit RAWCouplingAvail = 1, + parameter bit HardwareLegalizer = 1, + parameter bit RejectZeroTransfers = 1, + parameter bit ErrorHandling = 1, + parameter bit IdealMemory = 1, + parameter int unsigned Seed = 1337 +) (); + localparam time PERIOD = 10ns; + localparam time APPL_DELAY = PERIOD / 4; + localparam time ACQ_DELAY = PERIOD * 3 / 4; + + localparam integer RESET_CYCLES = 10; + + localparam integer DataWidth = 64; + localparam integer AddrWidth = 64; + localparam integer UserWidth = 1; + localparam integer AxiIdWidth = 3; + + typedef logic [63:0] addr_t; + typedef logic [ 2:0] axi_id_t; + typedef logic [ 3:0] mem_axi_id_t; + typedef axi_test::axi_ax_beat #(.AW(64), .IW(3), .UW(1)) ax_beat_t; + typedef axi_test::axi_r_beat #(.DW(64), .IW(3), .UW(1)) r_beat_t; + typedef axi_test::axi_w_beat #(.DW(64), .UW(1)) w_beat_t; + typedef axi_test::axi_b_beat #(.IW(3), .UW(1)) b_beat_t; + + `REG_BUS_TYPEDEF_ALL(reg, /* addr */ addr_t, /* data */ logic [63:0], /* strobe */ logic [7:0]) + `AXI_TYPEDEF_ALL(axi, /* addr */ addr_t, /* id */ axi_id_t, /* data */ logic [63:0], /* strb */ logic [7:0], /* user */ logic [0:0]) + `AXI_TYPEDEF_ALL(mem_axi, /* addr */ addr_t, /* id */ mem_axi_id_t, /* data */ logic [63:0], /* strb */ logic [7:0], /* user */ logic [0:0]) + + // iDMA struct definitions + typedef logic [TFLenWidth-1:0] tf_len_t; + + // iDMA request / response types + `IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, axi_id_t, addr_t, tf_len_t) + `IDMA_TYPEDEF_FULL_RSP_T(idma_rsp_t, addr_t) + + typedef struct packed { + axi_ar_chan_t ar_chan; + } axi_read_meta_channel_t; + + typedef struct packed { + axi_read_meta_channel_t axi; + } read_meta_channel_t; + + typedef struct packed { + axi_aw_chan_t aw_chan; + } axi_write_meta_channel_t; + + typedef struct packed { + axi_write_meta_channel_t axi; + } write_meta_channel_t; + + // set seed + initial begin + $urandom(Seed); + end + + class stimulus_t; + rand addr_t base; + rand idma_req_t burst; + rand logic do_irq; + addr_t next = 64'hffff_ffff_ffff_ffff; + + // an entire descriptor of 4 words must fit before the end of memory + constraint descriptor_fits_in_memory { (64'hffff_ffff_ffff_ffff - base) > 64'd32; } + constraint descriptor_is_in_descriptor_area { base > 64'h0000_ffff_ffff_ffff; } + constraint descriptor_is_aligned { (base & 64'hf) == 0; } + constraint no_empty_transfers { burst.length > '0; } + constraint src_fits_in_memory { 64'hffff_ffff_ffff_ffff - burst.src_addr > burst.length; } + constraint dst_fits_in_memory { 64'hffff_ffff_ffff_ffff - burst.dst_addr > burst.length; } + constraint src_is_not_in_descriptor_area { 64'h0000_ffff_ffff_ffff > (burst.src_addr + burst.length); } + constraint dst_is_not_in_descriptor_area { 64'h0000_ffff_ffff_ffff > (burst.dst_addr + burst.length); } + constraint src_aligned { (burst.src_addr & AlignmentMask) == 64'b0; } + constraint dst_aligned { (burst.dst_addr & AlignmentMask) == 64'b0; } + constraint src_burst_valid { burst.opt.src.burst inside { BURST_INCR }; } + constraint dst_burst_valid { burst.opt.dst.burst inside { BURST_INCR }; } + constraint reduce_len_equal { burst.opt.beo.src_reduce_len == burst.opt.beo.dst_reduce_len; } + constraint reduce_len_zero { burst.opt.beo.src_reduce_len == 1'b0; } + constraint beo_zero { burst.opt.beo.decouple_aw == '0 && burst.opt.beo.src_max_llen == '0 && burst.opt.beo.dst_max_llen == '0 && burst.opt.last == '0 && burst.opt.beo.decouple_rw == '0; } + constraint axi_params_zero_src { burst.opt.src.lock == '0 && burst.opt.src.prot == '0 && burst.opt.src.qos == '0 && burst.opt.src.region == '0; } + constraint axi_params_zero_dst { burst.opt.dst.lock == '0 && burst.opt.dst.prot == '0 && burst.opt.dst.qos == '0 && burst.opt.dst.region == '0; } + constraint axi_src_cache_zero { burst.opt.src.cache == '0; } + constraint axi_dst_cache_zero { burst.opt.dst.cache == '0; } + constraint transfer_length { burst.length == TransferLength; } + constraint irq { do_irq == DoIRQ; } + endclass + + typedef struct { + idma_req_t burst; + addr_t read_address; + logic [7:0] read_length; + logic [2:0] read_size; + addr_t write_address; + logic [7:0] write_length; + logic [2:0] write_size; + logic [63:0] write_data; + logic did_irq; + } result_t; + result_t golden_queue[$]; + + // clocks + logic clk; + logic rst_n; + + clk_rst_gen #( + .ClkPeriod(PERIOD), + .RstClkCycles(RESET_CYCLES) + ) i_clock_reset_generator ( + .clk_o (clk) , + .rst_no(rst_n) + ); + + // dut signals and module + REG_BUS #( + .ADDR_WIDTH(64), + .DATA_WIDTH(64) + ) i_reg_iface_bus (clk); + + reg_driver #( + .AW(64), + .DW(64), + .TA(APPL_DELAY), + .TT(ACQ_DELAY) + ) i_reg_iface_driver = new (i_reg_iface_bus); + + axi_resp_t dma_fe_master_response; + axi_req_t dma_fe_master_request; + axi_resp_t dma_be_cut_resp; + axi_req_t dma_be_cut_req; + axi_resp_t dma_be_master_response, axi_read_rsp, axi_write_rsp; + axi_req_t dma_be_master_request, axi_read_req, axi_write_req; + mem_axi_resp_t axi_mem_response; + mem_axi_req_t axi_mem_request; + mem_axi_resp_t axi_throttle_rsp; + mem_axi_req_t axi_throttle_req; + mem_axi_resp_t axi_multicut_rsp; + mem_axi_req_t axi_multicut_req; + + AXI_BUS_DV #( + .AXI_ADDR_WIDTH(64), + .AXI_DATA_WIDTH(64), + .AXI_ID_WIDTH(3), + .AXI_USER_WIDTH(1) + ) i_axi_be_bus (clk); + + AXI_BUS_DV #( + .AXI_ADDR_WIDTH(64), + .AXI_DATA_WIDTH(64), + .AXI_ID_WIDTH(3), + .AXI_USER_WIDTH(1) + ) i_axi_iface_bus (clk); + + axi_test::axi_driver #( + .AW(64), + .DW(64), + .IW(3), + .UW(1), + .TA(APPL_DELAY), + .TT(ACQ_DELAY) + ) i_axi_iface_driver = new (i_axi_iface_bus); + + reg_rsp_t dma_slave_response; + reg_req_t dma_slave_request; + + idma_pkg::idma_busy_t busy; + idma_req_t dma_be_req; + idma_rsp_t dma_be_rsp; + + logic dma_be_req_valid; + logic dma_be_req_ready; + logic dma_be_rsp_valid; + logic dma_be_rsp_ready; + logic irq; + + idma_desc64_top #( + .AddrWidth (64), + .DataWidth (64), + .AxiIdWidth (3), + .idma_req_t (idma_req_t), + .idma_rsp_t (idma_rsp_t), + .axi_rsp_t (axi_resp_t), + .axi_req_t (axi_req_t), + .axi_ar_chan_t (axi_ar_chan_t), + .axi_r_chan_t (axi_r_chan_t), + .reg_rsp_t (reg_rsp_t), + .reg_req_t (reg_req_t), + .InputFifoDepth (InputFifoDepth), + .PendingFifoDepth(PendingFifoDepth), + .BackendDepth (NumAxInFlight + BufferDepth), + .NSpeculation (NSpeculation) + ) i_dut ( + .clk_i (clk), + .rst_ni (rst_n), + .master_req_o (dma_fe_master_request), + .master_rsp_i (dma_fe_master_response), + .axi_ar_id_i (3'b111), + .axi_aw_id_i (3'b111), + .slave_req_i (dma_slave_request), + .slave_rsp_o (dma_slave_response), + .idma_req_o (dma_be_req), + .idma_req_valid_o(dma_be_req_valid), + .idma_req_ready_i(dma_be_req_ready), + .idma_rsp_i ('0), + .idma_rsp_valid_i(dma_be_rsp_valid), + .idma_rsp_ready_o(dma_be_rsp_ready), + .idma_busy_i (|busy), + .irq_o (irq) + ); + + idma_backend_rw_axi #( + .DataWidth ( DataWidth ), + .AddrWidth ( AddrWidth ), + .AxiIdWidth ( AxiIdWidth ), + .UserWidth ( UserWidth ), + .TFLenWidth ( TFLenWidth ), + .MaskInvalidData ( MaskInvalidData ), + .BufferDepth ( BufferDepth ), + .RAWCouplingAvail ( RAWCouplingAvail ), + .HardwareLegalizer ( HardwareLegalizer ), + .RejectZeroTransfers ( RejectZeroTransfers ), + .ErrorCap ( idma_pkg::NO_ERROR_HANDLING ), + .CombinedShifter ( 1'b0 ), + .PrintFifoInfo ( 1'b0 ), + .NumAxInFlight ( NumAxInFlight ), + .MemSysDepth ( 32'd0 ), + .idma_req_t ( idma_req_t ), + .idma_rsp_t ( idma_rsp_t ), + .idma_eh_req_t ( idma_pkg::idma_eh_req_t ), + .idma_busy_t ( idma_pkg::idma_busy_t ), + .axi_req_t ( axi_req_t ), + .axi_rsp_t ( axi_resp_t ), + .write_meta_channel_t ( write_meta_channel_t ), + .read_meta_channel_t ( read_meta_channel_t ) + ) i_idma_backend ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .testmode_i ( 1'b0 ), + .idma_req_i ( dma_be_req ), + .req_valid_i ( dma_be_req_valid ), + .req_ready_o ( dma_be_req_ready ), + .idma_rsp_o ( dma_be_rsp ), + .rsp_valid_o ( dma_be_rsp_valid ), + .rsp_ready_i ( dma_be_rsp_ready ), + .idma_eh_req_i ( '0 ), + .eh_req_valid_i ( '1 ), + .eh_req_ready_o ( /* unconnected */ ), + .axi_read_req_o ( axi_read_req ), + .axi_read_rsp_i ( axi_read_rsp ), + .axi_write_req_o ( axi_write_req ), + .axi_write_rsp_i ( axi_write_rsp ), + .busy_o ( busy ) + ); + + // Read Write Join + axi_rw_join #( + .axi_req_t ( axi_req_t ), + .axi_resp_t ( axi_resp_t ) + ) i_axi_rw_join ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .slv_read_req_i ( axi_read_req ), + .slv_read_resp_o ( axi_read_rsp ), + .slv_write_req_i ( axi_write_req ), + .slv_write_resp_o ( axi_write_rsp ), + .mst_req_o ( dma_be_master_request ), + .mst_resp_i ( dma_be_master_response ) + ); + + string trace_file; + initial begin + void'($value$plusargs("trace_file=%s", trace_file)); + end + `ifndef SYNTHESYS + `ifndef VERILATOR + initial begin : inital_tracer + automatic bit first_iter = 1'b1; + automatic int unsigned skipped_transfers = 0; + automatic int unsigned recorded_transfers = 0; + automatic integer tf; + automatic `IDMA_TRACER_MAX_TYPE cnst [string]; + automatic `IDMA_TRACER_MAX_TYPE meta [string]; + automatic `IDMA_TRACER_MAX_TYPE busy [string]; + automatic `IDMA_TRACER_MAX_TYPE axib [string]; + automatic string trace; + #0; + tf = $fopen(trace_file, "w"); + $display("[Tracer] Logging iDMA backend %s to %s", "i_idma_backend", trace_file); + forever begin + @(posedge i_idma_backend.clk_i); + if (i_idma_backend.rst_ni & irq) begin + skipped_transfers += 1; + if (skipped_transfers > TransfersToSkip) begin + break; + end + end + end + forever begin + @(posedge i_idma_backend.clk_i); + if (irq) begin + recorded_transfers += 1; + if (recorded_transfers >= TransfersToSkip / 2) begin + break; + end + end + /* Trace */ + trace = "{"; + /* Constants */ + cnst = '{ + "inst" : "i_idma_backend", + "data_width" : i_idma_backend.DataWidth, + "addr_width" : i_idma_backend.AddrWidth, + "user_width" : i_idma_backend.UserWidth, + "axi_id_width" : i_idma_backend.AxiIdWidth, + "num_ax_in_flight" : i_idma_backend.NumAxInFlight, + "buffer_depth" : i_idma_backend.BufferDepth, + "tf_len_width" : i_idma_backend.TFLenWidth, + "mem_sys_depth" : i_idma_backend.MemSysDepth, + "rw_coupling_avail" : i_idma_backend.RAWCouplingAvail, + "mask_invalid_data" : i_idma_backend.MaskInvalidData, + "hardware_legalizer" : i_idma_backend.HardwareLegalizer, + "reject_zero_transfers" : i_idma_backend.RejectZeroTransfers, + "error_cap" : i_idma_backend.ErrorCap, + "print_fifo_info" : i_idma_backend.PrintFifoInfo + }; + meta = '{ + "time" : $time() + }; + busy = '{ + "buffer" : i_idma_backend.busy_o.buffer_busy, + "r_dp" : i_idma_backend.busy_o.r_dp_busy, + "w_dp" : i_idma_backend.busy_o.w_dp_busy, + "r_leg" : i_idma_backend.busy_o.r_leg_busy, + "w_leg" : i_idma_backend.busy_o.w_leg_busy, + "eh_fsm" : i_idma_backend.busy_o.eh_fsm_busy, + "eh_cnt" : i_idma_backend.busy_o.eh_cnt_busy, + "raw_coupler" : i_idma_backend.busy_o.raw_coupler_busy + }; + axib = '{ + "w_valid" : i_idma_backend.axi_write_req_o.w_valid, + "w_ready" : i_idma_backend.axi_write_rsp_i.w_ready, + "w_strb" : i_idma_backend.axi_write_req_o.w.strb, + "r_valid" : i_idma_backend.axi_read_rsp_i.r_valid, + "r_ready" : i_idma_backend.axi_read_req_o.r_ready + }; + if ($isunknown(axib["w_ready"]) || $isunknown(axib["r_valid"])) begin + $fatal("UNKNOWN AXI STATE, THIS SHOULD NEVER HAPPEN!"); + end + /* Assembly */ + `IDMA_TRACER_STR_ASSEMBLY(cnst, first_iter); + `IDMA_TRACER_STR_ASSEMBLY(meta, 1); + `IDMA_TRACER_STR_ASSEMBLY(busy, 1); + `IDMA_TRACER_STR_ASSEMBLY(axib, 1); + `IDMA_TRACER_CLEAR_COND(first_iter); + /* Commit */ + $fwrite(tf, $sformatf("%s}\n", trace)); + end + end +`endif +`endif + + /* + axi_cut #( + .aw_chan_t (axi_aw_chan_t), + .w_chan_t (axi_w_chan_t), + .b_chan_t (axi_b_chan_t), + .ar_chan_t (axi_ar_chan_t), + .r_chan_t (axi_r_chan_t), + .axi_req_t (axi_req_t), + .axi_resp_t(axi_resp_t) + ) i_axi_cut ( + .clk_i (clk), + .rst_ni (rst_n), + .slv_req_i (dma_be_cut_req), + .slv_resp_o (dma_be_cut_resp), + .mst_req_o (dma_be_master_request), + .mst_resp_i (dma_be_master_response) + ); + */ + + // AXI mux + axi_mux #( + .SlvAxiIDWidth (3), + .slv_aw_chan_t (axi_aw_chan_t), + .mst_aw_chan_t (mem_axi_aw_chan_t), + .w_chan_t (axi_w_chan_t), + .slv_b_chan_t (axi_b_chan_t), + .mst_b_chan_t (mem_axi_b_chan_t), + .slv_ar_chan_t (axi_ar_chan_t), + .mst_ar_chan_t (mem_axi_ar_chan_t), + .slv_r_chan_t (axi_r_chan_t), + .mst_r_chan_t (mem_axi_r_chan_t), + .slv_req_t (axi_req_t), + .slv_resp_t (axi_resp_t), + .mst_req_t (mem_axi_req_t), + .mst_resp_t (mem_axi_resp_t), + .NoSlvPorts (2), + .MaxWTrans (MaxAxInFlight), + .FallThrough (1'b0), + .SpillAw (1'b0), + .SpillW (1'b0), + .SpillB (1'b0), + .SpillAr (1'b0), + .SpillR (1'b0) + ) i_mux ( + .clk_i (clk), + .rst_ni (rst_n), + .test_i (1'b0), + .slv_reqs_i ({dma_be_master_request, dma_fe_master_request}), + .slv_resps_o({dma_be_master_response, dma_fe_master_response}), + .mst_req_o (axi_throttle_req), + .mst_resp_i (axi_throttle_rsp) + ); + + // sim memory + axi_sim_mem #( + .AddrWidth ( AddrWidth ), + .DataWidth ( DataWidth ), + .IdWidth (AxiIdWidth + 1), + .UserWidth ( UserWidth ), + .axi_req_t (mem_axi_req_t ), + .axi_rsp_t (mem_axi_resp_t), + .WarnUninitialized ( 1'b0 ), + .ClearErrOnAccess ( 1'b1 ), + .ApplDelay ( APPL_DELAY ), + .AcqDelay ( ACQ_DELAY ) + ) i_axi_sim_mem ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .axi_req_i ( axi_mem_request ), + .axi_rsp_o ( axi_mem_response ) + ); + + // allow 1 AR, 1 AW in-flight + axi_throttle #( + .MaxNumAwPending(MaxAxInFlight), + .MaxNumArPending(MaxAxInFlight), + .axi_req_t(mem_axi_req_t), + .axi_rsp_t(mem_axi_resp_t) + ) i_axi_throttle ( + .clk_i (clk), + .rst_ni(rst_n), + .req_i(axi_throttle_req), + .rsp_o(axi_throttle_rsp), + .req_o(axi_multicut_req), + .rsp_i(axi_multicut_rsp), + .w_credit_i (MaxAxInFlight), + .r_credit_i (MaxAxInFlight) + ); + + // delay the signals using AXI4 multicuts + axi_multicut #( + .NoCuts ( MemLatency ), + .aw_chan_t ( mem_axi_aw_chan_t ), + .w_chan_t ( mem_axi_w_chan_t ), + .b_chan_t ( mem_axi_b_chan_t ), + .ar_chan_t ( mem_axi_ar_chan_t ), + .r_chan_t ( mem_axi_r_chan_t ), + .axi_req_t ( mem_axi_req_t ), + .axi_resp_t ( mem_axi_resp_t ) + ) i_axi_multicut ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .slv_req_i ( axi_multicut_req ), + .slv_resp_o ( axi_multicut_rsp ), + .mst_req_o ( axi_mem_request ), + .mst_resp_i ( axi_mem_response ) + ); + + `REG_BUS_ASSIGN_TO_REQ(dma_slave_request, i_reg_iface_bus); + `REG_BUS_ASSIGN_FROM_RSP(i_reg_iface_bus, dma_slave_response); + + `AXI_ASSIGN_FROM_REQ(i_axi_iface_bus, dma_fe_master_request); + `AXI_ASSIGN_FROM_RESP(i_axi_iface_bus, dma_fe_master_response); + + `AXI_ASSIGN_FROM_REQ(i_axi_be_bus, dma_be_master_request); + `AXI_ASSIGN_FROM_RESP(i_axi_be_bus, dma_be_master_response); + + initial begin + i_axi_iface_driver.reset_slave(); + end + + // queues for communication and data transfer + stimulus_t generated_stimuli[$][$]; + result_t ar_seen_result[$]; + result_t inflight_results_after_reads[$]; + result_t inflight_results_submitted_to_be[$]; + result_t aw_seen_result[$]; + result_t w_seen_result[$]; + result_t result_queue[$]; + + function automatic void generate_stimuli(); + automatic addr_t base_current = 64'h0001_0000_0000_0000; + automatic int contiguous = 0; + repeat (NumberOfTests) begin + automatic stimulus_t current_stimulus; + automatic stimulus_t current_stimuli_group[$]; + automatic int number_of_descriptors_in_test; + + number_of_descriptors_in_test = ChainedDescriptors; + + current_stimulus = new(); + if (!current_stimulus.randomize()) begin + $error("Couldn't randomize stimulus"); + end else begin + + // overwrite protocols + current_stimulus.burst.opt.src_protocol = idma_pkg::AXI; + current_stimulus.burst.opt.dst_protocol = idma_pkg::AXI; + + current_stimulus.base = base_current; + current_stimuli_group.push_back(current_stimulus); + contiguous += 1; + golden_queue.push_back('{ + burst: current_stimulus.burst, + + read_address: current_stimulus.base, + // axi length 3 is 4 transfers (+1) + read_length: 'd3, + // 2^3 = 8 bytes in a transfer + read_size: 'b011, + + write_address: current_stimulus.base, + // axi length 0 is 1 transfer (+1) + write_length: 8'b0, + // 2^3 = 8 bytes in a transfer + write_size: 3'b011, + write_data: 64'hffff_ffff_ffff_ffff, + + did_irq: current_stimulus.do_irq + }); + if (contiguous != NumContiguous) begin + base_current += 'd32; + end else begin + // make sure all invalid prefetches grab Xs from memory + base_current += 'h1000; + contiguous = '0; + end + end + + repeat (number_of_descriptors_in_test - 1) begin + current_stimulus = new(); + if (!current_stimulus.randomize()) begin + $error("Couldn't randomize stimulus"); + end else begin + + // overwrite protocols + current_stimulus.burst.opt.src_protocol = idma_pkg::AXI; + current_stimulus.burst.opt.dst_protocol = idma_pkg::AXI; + + current_stimulus.base = base_current; + contiguous += 1; + + // chain descriptor + current_stimuli_group[$].next = current_stimulus.base; + + current_stimuli_group.push_back(current_stimulus); + + golden_queue.push_back('{ + burst: current_stimulus.burst, + + read_address: current_stimulus.base, + // axi length 3 is 4 transfers (+1) + read_length: 'd3, + // 2^3 = 8 bytes in a transfer + read_size: 'b011, + + write_address: current_stimulus.base, + // axi length 0 is 1 transfer (+1) + write_length: 8'b0, + // 2^3 = 8 bytes in a transfer + write_size: 3'b011, + write_data: 64'hffff_ffff_ffff_ffff, + + did_irq: current_stimulus.do_irq + }); + end + if (contiguous != NumContiguous) begin + base_current += 'd32; + end else begin + // make sure all invalid prefetches grab Xs from memory + base_current += 'h1000; + contiguous = '0; + end + end + generated_stimuli.push_back(current_stimuli_group); + end + // make the last stimulus generate an irq to simplify the IRQ + // acquisition + // NOTE: with few requests this might impact statitics of the no-IRQ + // case + generated_stimuli[$][$].do_irq = 1'b1; + golden_queue[$].did_irq = 1'b1; + endfunction : generate_stimuli + + function automatic void write_mem_64(addr_t base, logic[63:0] data); + i_axi_sim_mem.mem[base] = data[ 7: 0]; + i_axi_sim_mem.mem[base + 1] = data[15: 8]; + i_axi_sim_mem.mem[base + 2] = data[23:16]; + i_axi_sim_mem.mem[base + 3] = data[31:24]; + i_axi_sim_mem.mem[base + 4] = data[39:32]; + i_axi_sim_mem.mem[base + 5] = data[47:40]; + i_axi_sim_mem.mem[base + 6] = data[55:48]; + i_axi_sim_mem.mem[base + 7] = data[63:56]; + endfunction : write_mem_64 + + function automatic void load_descriptors_into_memory(); + $display("Loading descriptors"); + foreach (generated_stimuli[i]) begin + foreach (generated_stimuli[i][j]) begin + automatic addr_t base = generated_stimuli[i][j].base; + write_mem_64(base, stimulus_to_flag_bits(generated_stimuli[i][j])); + if (j == (generated_stimuli[i].size() - 1)) begin + write_mem_64(base + 64'h8, 64'hffff_ffff_ffff_ffff); + end else begin + write_mem_64(base + 64'h8, generated_stimuli[i][j+1].base); + end + write_mem_64(base + 64'h10, generated_stimuli[i][j].burst.src_addr); + write_mem_64(base + 64'h18, generated_stimuli[i][j].burst.dst_addr); + end + end + endfunction : load_descriptors_into_memory + + task apply_stimuli(); + fork + regbus_slave_interaction(); + join + endtask + + task collect_responses(); + fork + axi_master_acquire_ars(); + axi_master_acquire_rs(); + axi_master_acquire_aw(); + axi_master_acquire_w(); + axi_master_acquire_irqs(); + acquire_bursts(); + join + endtask + + // regbus slave interaction (we're acting as master) + task regbus_slave_interaction(); + automatic stimulus_t current_stimulus_group[$]; + i_reg_iface_driver.reset_master(); + @(posedge rst_n); + + forever begin + automatic logic [63:0] status; + automatic addr_t start_addr; + automatic logic error; + + wait (generated_stimuli.size() > '0); + current_stimulus_group = generated_stimuli.pop_front(); + + i_reg_iface_driver.send_write( + .addr (IDMA_DESC64_DESC_ADDR_OFFSET) , + .data (current_stimulus_group[0].base), + .strb (8'hff) , + .error(error) + ); + end + endtask + + function automatic logic [63:0] stimulus_to_flag_bits(stimulus_t stim); + // Copied from frontend: + // bit 0 set to trigger an irq on completion, unset to not be notified + // bits 2:1 burst type for source, fixed: 00, incr: 01, wrap: 10 + // bits 4:3 burst type for destination, fixed: 00, incr: 01, wrap: 10 + // for a description of these modes, check AXI-Pulp documentation + // bit 5 set to decouple reads and writes in the backend + // bit 6 set to serialize requests. Not setting might violate AXI spec + // bit 7 set to deburst (each burst is split into own transfer) + // for a more thorough description, refer to the iDMA backend documentation + // bits 11:8 Bitfield for AXI cache attributes for the source + // bits 15:12 Bitfield for AXI cache attributes for the destination + // bits of the bitfield (refer to AXI-Pulp for a description): + // bit 0: cache bufferable + // bit 1: cache modifiable + // bit 2: cache read alloc + // bit 3: cache write alloc + // bits 23:16 AXI ID used for the transfer + // bits 31:26 unused/reserved + automatic logic [63:0] result = '0; + automatic logic [31:0] flags = '0; + + flags[0] = stim.do_irq; + flags[2:1] = stim.burst.opt.src.burst; + flags[4:3] = stim.burst.opt.dst.burst; + flags[5] = stim.burst.opt.beo.decouple_rw; + flags[6] = 1'b0; + // flags[6] = stim.burst.opt.beo.serialize; + flags[7] = stim.burst.opt.beo.src_reduce_len; + flags[11:8] = stim.burst.opt.src.cache; + flags[15:12] = stim.burst.opt.dst.cache; + flags[23:16] = stim.burst.opt.axi_id; + flags[31:26] = '0; + + result[31:0] = stim.burst.length; + result[63:32] = flags; + return result; + endfunction + + task axi_master_acquire_ars(); + @(posedge rst_n); + forever begin + automatic ax_beat_t ar_beat; + automatic result_t current_result; + // monitor ar + i_axi_iface_driver.mon_ar(ar_beat); + // and record contents + current_result.read_address = ar_beat.ax_addr; + current_result.read_length = ar_beat.ax_len; + current_result.read_size = ar_beat.ax_size; + ar_seen_result.push_back(current_result); + end + endtask : axi_master_acquire_ars + + task axi_master_acquire_rs(); + @(posedge rst_n); + forever begin + automatic r_beat_t r_beat; + automatic result_t current_result; + wait (ar_seen_result.size() > 0); + current_result = ar_seen_result.pop_front(); + i_axi_iface_driver.mon_r(r_beat); + if ($isunknown(r_beat.r_data)) begin + // drop current result + // as it is a prefetched one + end else begin + inflight_results_after_reads.push_back(current_result); + end + // four reads per descriptor in the 64-bit case + i_axi_iface_driver.mon_r(r_beat); + i_axi_iface_driver.mon_r(r_beat); + i_axi_iface_driver.mon_r(r_beat); + if (!r_beat.r_last) begin + $error("R acquisition has come out-of-sync."); + end + end + endtask : axi_master_acquire_rs + + task axi_master_acquire_aw(); + // set to one to skip first submission of what would be an invalid result + automatic result_t current_result; + @(posedge rst_n); + forever begin + automatic ax_beat_t aw_beat; + i_axi_iface_driver.mon_aw(aw_beat); + + wait (inflight_results_submitted_to_be.size() > 0); + current_result = inflight_results_submitted_to_be.pop_front(); + current_result.write_address = aw_beat.ax_addr; + current_result.write_length = aw_beat.ax_len; + current_result.write_size = aw_beat.ax_size; + aw_seen_result.push_back(current_result); + end + endtask + + task axi_master_acquire_w(); + automatic result_t current_result; + @(posedge rst_n); + forever begin + automatic w_beat_t w_beat; + i_axi_iface_driver.mon_w(w_beat); + wait (aw_seen_result.size() > 0); + current_result = aw_seen_result.pop_front(); + current_result.write_data = w_beat.w_data; + w_seen_result.push_back(current_result); + end + endtask : axi_master_acquire_w + + task axi_master_acquire_irqs(); + automatic result_t current_result; + @(posedge rst_n); + forever begin + automatic b_beat_t b_beat; + automatic result_t current_result; + + // HACK: I'm taking advantage of the knowledge that the irq and + // B happen in the same cycle + i_axi_iface_driver.mon_b(b_beat); + wait(w_seen_result.size() > 0); + current_result = w_seen_result.pop_front(); + current_result.did_irq = irq; + result_queue.push_back(current_result); + end + endtask : axi_master_acquire_irqs + + task acquire_bursts(); + automatic result_t current_result; + automatic idma_req_t current_burst; + @(posedge rst_n); + forever begin + forever begin + @(posedge clk); + #(ACQ_DELAY); + if (dma_be_req_valid && dma_be_req_ready) break; + end + current_burst = dma_be_req; + wait (inflight_results_after_reads.size() > 0); + current_result = inflight_results_after_reads.pop_front(); + current_result.burst = current_burst; + inflight_results_submitted_to_be.push_back(current_result); + end + endtask + + // score the results + initial begin : proc_scoring + static logic finished_simulation = 1'b0; + + static int number_of_descriptors = 0; + static int read_addr_errors = 0; + static int read_length_errors = 0; + static int read_size_errors = 0; + static int write_addr_errors = 0; + static int write_length_errors = 0; + static int write_data_errors = 0; + static int write_size_errors = 0; + static int burst_errors = 0; + static int irq_errors = 0; + + generate_stimuli(); + load_descriptors_into_memory(); + + fork + apply_stimuli(); + collect_responses(); + begin : watchdog + @(posedge rst_n); + repeat (SimulationTimeoutCycles) begin + @(posedge clk); + end + end : watchdog + begin : scorer + @(posedge rst_n); + + while (golden_queue.size() > '0) begin + automatic result_t golden; + automatic result_t actual; + wait (result_queue.size() > 0); + golden = golden_queue.pop_front(); + actual = result_queue.pop_front(); + if (golden.burst !== actual.burst) begin + $error("Burst mismatch @ %d:\ngolden: %p\nactual: %p", + number_of_descriptors, golden.burst, actual.burst); + ++burst_errors; + end + if (golden.read_address !== actual.read_address) begin + $error("Read address mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.read_address, actual.read_address); + ++read_addr_errors; + end + if (golden.read_length !== actual.read_length) begin + $error("Read length mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.read_length, actual.read_length); + ++read_length_errors; + end + if (golden.read_size !== actual.read_size) begin + $error("Read size mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.read_size, actual.read_size); + ++read_size_errors; + end + if (golden.write_address !== actual.write_address) begin + $error("Write address mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.write_address, actual.write_address); + ++write_addr_errors; + end + if (golden.write_length !== actual.write_length) begin + $error("Write length mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.write_length, actual.write_length); + ++write_length_errors; + end + if (golden.write_size !== actual.write_size) begin + $error("Write size mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.write_size, actual.write_size); + ++write_size_errors; + end + if (golden.write_data !== actual.write_data) begin + $error("Write data mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.write_data, actual.write_data); + ++write_data_errors; + end + if (golden.did_irq !== actual.did_irq) begin + $error("IRQ mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.did_irq, actual.did_irq); + ++irq_errors; + end + ++number_of_descriptors; + end + // wait for frontend to signal no longer busy + forever begin + automatic logic [63:0] status; + automatic logic error; + i_reg_iface_driver.send_read( + .addr(IDMA_DESC64_STATUS_OFFSET), + .data(status), + .error(error) + ); + if (status[0] != 1'b1) break; + end + finished_simulation = 1'b1; + end : scorer + join_any + disable fork; + if (!finished_simulation) begin + $error("Simulation timed out."); + end else begin + $display("Simulation finished in a timely manner."); + end + $display("Saw %d descriptors." , number_of_descriptors); + $display("Read address errors: %d", read_addr_errors); + $display("Read length errors: %d", read_length_errors); + $display("Read size errors: %d", read_size_errors); + $display("Write address errors: %d", write_addr_errors); + $display("Write length errors: %d", write_length_errors); + $display("Write size errors: %d", write_size_errors); + $display("Write data errors: %d", write_data_errors); + $display("Burst errors: %d", burst_errors); + $display("IRQ errors: %d", irq_errors); + $finish(); + end : proc_scoring +endmodule : tb_idma_desc64_bench diff --git a/test/frontend/tb_idma_desc64_top.sv b/test/frontend/tb_idma_desc64_top.sv new file mode 100644 index 00000000..40ac3528 --- /dev/null +++ b/test/frontend/tb_idma_desc64_top.sv @@ -0,0 +1,720 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Axel Vanoni + +`include "register_interface/typedef.svh" +`include "register_interface/assign.svh" +`include "idma/typedef.svh" +`include "axi/typedef.svh" +`include "axi/assign.svh" + + + +/// VIP for the descriptor-based frontend +module tb_idma_desc64_top + import idma_desc64_reg_pkg::IDMA_DESC64_DESC_ADDR_OFFSET; + import idma_desc64_reg_pkg::IDMA_DESC64_STATUS_OFFSET; + import rand_verif_pkg::rand_wait; + import axi_pkg::*; + import reg_test::reg_driver; #( + parameter integer NumberOfTests = 100, + parameter integer SimulationTimeoutCycles = 100000, + parameter integer ChainedDescriptors = -1, + parameter int unsigned MaxChainedDescriptors = 10, + parameter int unsigned MinChainedDescriptors = 1, + parameter integer TransferLength = 1024, + parameter integer AlignmentMask = 'h0f, + parameter integer NumContiguous = 200000, + parameter integer MaxAxInFlight = 64, + parameter bit DoIRQ = 1, + parameter integer TransfersToSkip = 4, + // from frontend + parameter int unsigned InputFifoDepth = 8, + parameter int unsigned PendingFifoDepth = 8, + parameter int unsigned NSpeculation = 4, + parameter int unsigned BackendDepth = 5, + parameter int unsigned MaxAWWPending = 8, + parameter int unsigned Seed = 1337 +) (); + localparam time PERIOD = 10ns; + localparam time APPL_DELAY = PERIOD / 4; + localparam time ACQ_DELAY = PERIOD * 3 / 4; + + localparam integer RESET_CYCLES = 10; + + typedef logic [63:0] addr_t; + typedef logic [ 2:0] axi_id_t; + typedef axi_test::axi_ax_beat #(.AW(64), .IW(3), .UW(1)) ax_beat_t; + typedef axi_test::axi_r_beat #(.DW(64), .IW(3), .UW(1)) r_beat_t; + typedef axi_test::axi_w_beat #(.DW(64), .UW(1)) w_beat_t; + typedef axi_test::axi_b_beat #(.IW(3), .UW(1)) b_beat_t; + + `REG_BUS_TYPEDEF_ALL(reg, /* addr */ addr_t, /* data */ logic [63:0], /* strobe */ logic [7:0]) + `AXI_TYPEDEF_ALL(axi, /* addr */ addr_t, /* id */ axi_id_t, /* data */ logic [63:0], /* strb */ logic [7:0], /* user */ logic [0:0]) + + // iDMA struct definitions + localparam int unsigned TFLenWidth = 32; + typedef logic [TFLenWidth-1:0] tf_len_t; + + // iDMA request / response types + `IDMA_TYPEDEF_FULL_REQ_T(idma_req_t, axi_id_t, addr_t, tf_len_t) + `IDMA_TYPEDEF_FULL_RSP_T(idma_rsp_t, addr_t) + + // set seed + initial begin + $urandom(Seed); + end + + class stimulus_t; + rand addr_t base; + rand idma_req_t burst; + rand logic do_irq; + addr_t next = 64'hffff_ffff_ffff_ffff; + + // an entire descriptor of 4 words must fit before the end of memory + constraint descriptor_fits_in_memory { (64'hffff_ffff_ffff_ffff - base) > 64'd32; } + constraint descriptor_is_in_descriptor_area { base > 64'h0000_ffff_ffff_ffff; } + constraint descriptor_is_aligned { (base & 64'hf) == 0; } + constraint no_empty_transfers { burst.length > '0; } + constraint src_fits_in_memory { 64'hffff_ffff_ffff_ffff - burst.src_addr > burst.length; } + constraint dst_fits_in_memory { 64'hffff_ffff_ffff_ffff - burst.dst_addr > burst.length; } + constraint src_burst_valid { burst.opt.src.burst inside { BURST_INCR, BURST_WRAP, BURST_FIXED }; } + constraint dst_burst_valid { burst.opt.dst.burst inside { BURST_INCR, BURST_WRAP, BURST_FIXED }; } + constraint src_is_not_in_descriptor_area { 64'h0000_ffff_ffff_ffff > (burst.src_addr + burst.length); } + constraint dst_is_not_in_descriptor_area { 64'h0000_ffff_ffff_ffff > (burst.dst_addr + burst.length); } + constraint src_aligned { (burst.src_addr & AlignmentMask) == 64'b0; } + constraint dst_aligned { (burst.dst_addr & AlignmentMask) == 64'b0; } + constraint reduce_len_equal { burst.opt.beo.src_reduce_len == burst.opt.beo.dst_reduce_len; } + constraint reduce_len_zero { burst.opt.beo.src_reduce_len == 1'b0; } + constraint beo_zero { burst.opt.beo.decouple_aw == '0 && burst.opt.beo.src_max_llen == '0 && burst.opt.beo.dst_max_llen == '0 && burst.opt.last == '0 && burst.opt.beo.decouple_rw == '0; } + constraint axi_params_zero_src { burst.opt.src.lock == '0 && burst.opt.src.prot == '0 && burst.opt.src.qos == '0 && burst.opt.src.region == '0; } + constraint axi_params_zero_dst { burst.opt.dst.lock == '0 && burst.opt.dst.prot == '0 && burst.opt.dst.qos == '0 && burst.opt.dst.region == '0; } + constraint axi_src_cache_zero { burst.opt.src.cache == '0; } + constraint axi_dst_cache_zero { burst.opt.dst.cache == '0; } + constraint transfer_length { burst.length == TransferLength; } + endclass + + typedef struct { + idma_req_t burst; + addr_t read_address; + logic [7:0] read_length; + logic [2:0] read_size; + addr_t write_address; + logic [7:0] write_length; + logic [2:0] write_size; + logic [63:0] write_data; + logic did_irq; + } result_t; + result_t golden_queue[$]; + + // clocks + logic clk; + logic rst_n; + + clk_rst_gen #( + .ClkPeriod(PERIOD), + .RstClkCycles(RESET_CYCLES) + ) i_clock_reset_generator ( + .clk_o (clk) , + .rst_no(rst_n) + ); + + // dut signals and module + REG_BUS #( + .ADDR_WIDTH(64), + .DATA_WIDTH(64) + ) i_reg_iface_bus (clk); + + reg_driver #( + .AW(64), + .DW(64), + .TA(APPL_DELAY), + .TT(ACQ_DELAY) + ) i_reg_iface_driver = new (i_reg_iface_bus); + + axi_resp_t dma_master_response; + axi_req_t dma_master_request; + + AXI_BUS_DV #( + .AXI_ADDR_WIDTH(64), + .AXI_DATA_WIDTH(64), + .AXI_ID_WIDTH(3), + .AXI_USER_WIDTH(1) + ) i_axi_iface_bus (clk); + + axi_test::axi_driver #( + .AW(64), + .DW(64), + .IW(3), + .UW(1), + .TA(APPL_DELAY), + .TT(ACQ_DELAY) + ) i_axi_iface_driver = new (i_axi_iface_bus); + + reg_rsp_t dma_slave_response; + reg_req_t dma_slave_request; + + idma_req_t dma_be_req; + + logic backend_busy; + logic dma_be_req_valid; + logic dma_be_req_ready; + logic dma_be_rsp_valid; + logic dma_be_rsp_ready; + logic irq; + + idma_desc64_top #( + .AddrWidth (64), + .DataWidth (64), + .AxiIdWidth (3), + .idma_req_t (idma_req_t), + .idma_rsp_t (idma_rsp_t), + .axi_rsp_t (axi_resp_t), + .axi_req_t (axi_req_t), + .axi_ar_chan_t (axi_ar_chan_t), + .axi_r_chan_t (axi_r_chan_t), + .reg_rsp_t (reg_rsp_t), + .reg_req_t (reg_req_t), + .InputFifoDepth (InputFifoDepth), + .PendingFifoDepth(PendingFifoDepth), + .BackendDepth (BackendDepth), + .NSpeculation (NSpeculation) + ) i_dut ( + .clk_i (clk), + .rst_ni (rst_n), + .master_req_o (dma_master_request), + .master_rsp_i (dma_master_response), + .axi_ar_id_i (3'b111), + .axi_aw_id_i (3'b111), + .slave_req_i (dma_slave_request), + .slave_rsp_o (dma_slave_response), + .idma_req_o (dma_be_req), + .idma_req_valid_o(dma_be_req_valid), + .idma_req_ready_i(dma_be_req_ready), + .idma_rsp_i ('0), + .idma_rsp_valid_i(dma_be_rsp_valid), + .idma_rsp_ready_o(dma_be_rsp_ready), + .idma_busy_i (backend_busy), + .irq_o (irq) + ); + + // sim memory + axi_sim_mem #( + .AddrWidth ( 64 ), + .DataWidth ( 64 ), + .IdWidth (3 ), + .UserWidth (1 ), + .axi_req_t (axi_req_t ), + .axi_rsp_t (axi_resp_t), + .WarnUninitialized (1'b0 ), + .ClearErrOnAccess (1'b1 ), + .ApplDelay (APPL_DELAY), + .AcqDelay (ACQ_DELAY ) + ) i_axi_sim_mem ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .axi_req_i ( dma_master_request ), + .axi_rsp_o ( dma_master_response ) + ); + + assign dma_slave_request.addr = i_reg_iface_bus.addr; + assign dma_slave_request.write = i_reg_iface_bus.write; + assign dma_slave_request.wdata = i_reg_iface_bus.wdata; + assign dma_slave_request.wstrb = i_reg_iface_bus.wstrb; + assign dma_slave_request.valid = i_reg_iface_bus.valid; + assign i_reg_iface_bus.rdata = dma_slave_response.rdata; + assign i_reg_iface_bus.ready = dma_slave_response.ready; + assign i_reg_iface_bus.error = dma_slave_response.error; + + `AXI_ASSIGN_FROM_REQ(i_axi_iface_bus, dma_master_request); + `AXI_ASSIGN_FROM_RESP(i_axi_iface_bus, dma_master_response); + + initial begin + dma_be_rsp_valid = 1'b0; + dma_be_req_ready = 1'b0; + backend_busy = 1'b0; + end + + // queues for communication and data transfer + stimulus_t generated_stimuli[$][$]; + stimulus_t inflight_stimuli[$][$]; + result_t ar_seen_result[$]; + result_t inflight_results_after_reads[$]; + result_t inflight_results_submitted_to_be[$]; + result_t aw_seen_result[$]; + result_t w_seen_result[$]; + result_t result_queue[$]; + + function automatic void generate_stimuli(); + repeat (NumberOfTests) begin + automatic stimulus_t current_stimulus; + automatic stimulus_t current_stimuli_group[$]; + automatic int number_of_descriptors_in_test; + + if (ChainedDescriptors < 0) begin + void'(std::randomize(number_of_descriptors_in_test) with { + number_of_descriptors_in_test >= MinChainedDescriptors; + number_of_descriptors_in_test <= MaxChainedDescriptors; + }); + end else begin + number_of_descriptors_in_test = ChainedDescriptors; + end + + current_stimulus = new(); + if (!current_stimulus.randomize()) begin + $error("Couldn't randomize stimulus"); + end else begin + + // overwrite protocols + current_stimulus.burst.opt.src_protocol = idma_pkg::AXI; + current_stimulus.burst.opt.dst_protocol = idma_pkg::AXI; + + current_stimuli_group.push_back(current_stimulus); + golden_queue.push_back('{ + burst: current_stimulus.burst, + + read_address: current_stimulus.base, + // axi length 3 is 4 transfers (+1) + read_length: 'd3, + // 2^3 = 8 bytes in a transfer + read_size: 'b011, + + write_address: current_stimulus.base, + // axi length 0 is 1 transfer (+1) + write_length: 8'b0, + // 2^3 = 8 bytes in a transfer + write_size: 3'b011, + write_data: 64'hffff_ffff_ffff_ffff, + + did_irq: current_stimulus.do_irq + }); + end + + repeat (number_of_descriptors_in_test - 1) begin + current_stimulus = new(); + if (!current_stimulus.randomize()) begin + $error("Couldn't randomize stimulus"); + end else begin + + // overwrite protocols + current_stimulus.burst.opt.src_protocol = idma_pkg::AXI; + current_stimulus.burst.opt.dst_protocol = idma_pkg::AXI; + + // chain descriptor + current_stimuli_group[$].next = current_stimulus.base; + + current_stimuli_group.push_back(current_stimulus); + + golden_queue.push_back('{ + burst: current_stimulus.burst, + + read_address: current_stimulus.base, + // axi length 3 is 4 transfers (+1) + read_length: 'd3, + // 2^3 = 8 bytes in a transfer + read_size: 'b011, + + write_address: current_stimulus.base, + // axi length 0 is 1 transfer (+1) + write_length: 8'b0, + // 2^3 = 8 bytes in a transfer + write_size: 3'b011, + write_data: 64'hffff_ffff_ffff_ffff, + + did_irq: current_stimulus.do_irq + }); + end + end + generated_stimuli.push_back(current_stimuli_group); + end + // make the last stimulus generate an irq to simplify the IRQ + // acquisition + generated_stimuli[$][$].do_irq = 1'b1; + golden_queue[$].did_irq = 1'b1; + endfunction : generate_stimuli + + function automatic void write_mem_64(addr_t base, logic[63:0] data); + i_axi_sim_mem.mem[base] = data[ 7: 0]; + i_axi_sim_mem.mem[base + 1] = data[15: 8]; + i_axi_sim_mem.mem[base + 2] = data[23:16]; + i_axi_sim_mem.mem[base + 3] = data[31:24]; + i_axi_sim_mem.mem[base + 4] = data[39:32]; + i_axi_sim_mem.mem[base + 5] = data[47:40]; + i_axi_sim_mem.mem[base + 6] = data[55:48]; + i_axi_sim_mem.mem[base + 7] = data[63:56]; + endfunction : write_mem_64 + + function automatic void load_descriptors_into_memory(); + $display("Loading descriptors"); + foreach (generated_stimuli[i]) begin + foreach (generated_stimuli[i][j]) begin + automatic addr_t base = generated_stimuli[i][j].base; + write_mem_64(base, stimulus_to_flag_bits(generated_stimuli[i][j])); + if (j == (generated_stimuli[i].size() - 1)) begin + write_mem_64(base + 64'h8, 64'hffff_ffff_ffff_ffff); + end else begin + write_mem_64(base + 64'h8, generated_stimuli[i][j+1].base); + end + write_mem_64(base + 64'h10, generated_stimuli[i][j].burst.src_addr); + write_mem_64(base + 64'h18, generated_stimuli[i][j].burst.dst_addr); + end + end + endfunction : load_descriptors_into_memory + + task apply_stimuli(); + fork + regbus_slave_interaction(); + backend_tx_done_notifier(); + backend_acceptor(); + join + endtask + + task collect_responses(); + fork + axi_master_acquire_ars(); + axi_master_acquire_rs(); + axi_master_acquire_aw_w_and_irqs(); + acquire_bursts(); + join + endtask + + // regbus slave interaction (we're acting as master) + task regbus_slave_interaction(); + automatic stimulus_t current_stimulus_group[$]; + i_reg_iface_driver.reset_master(); + @(posedge rst_n); + + forever begin + automatic logic [63:0] status; + automatic addr_t start_addr; + automatic logic error; + + wait (generated_stimuli.size() > '0); + current_stimulus_group = generated_stimuli.pop_front(); + + i_reg_iface_driver.send_write( + .addr (IDMA_DESC64_DESC_ADDR_OFFSET) , + .data (current_stimulus_group[0].base), + .strb (8'hff) , + .error(error) + ); + inflight_stimuli.push_back(current_stimulus_group); + end + endtask + + function automatic logic [63:0] stimulus_to_flag_bits(stimulus_t stim); + // Copied from frontend: + // bit 0 set to trigger an irq on completion, unset to not be notified + // bits 2:1 burst type for source, fixed: 00, incr: 01, wrap: 10 + // bits 4:3 burst type for destination, fixed: 00, incr: 01, wrap: 10 + // for a description of these modes, check AXI-Pulp documentation + // bit 5 set to decouple reads and writes in the backend + // bit 6 set to serialize requests. Not setting might violate AXI spec + // bit 7 set to deburst (each burst is split into own transfer) + // for a more thorough description, refer to the iDMA backend documentation + // bits 11:8 Bitfield for AXI cache attributes for the source + // bits 15:12 Bitfield for AXI cache attributes for the destination + // bits of the bitfield (refer to AXI-Pulp for a description): + // bit 0: cache bufferable + // bit 1: cache modifiable + // bit 2: cache read alloc + // bit 3: cache write alloc + // bits 23:16 AXI ID used for the transfer + // bits 31:26 unused/reserved + automatic logic [63:0] result = '0; + automatic logic [31:0] flags = '0; + + flags[0] = stim.do_irq; + flags[2:1] = stim.burst.opt.src.burst; + flags[4:3] = stim.burst.opt.dst.burst; + flags[5] = stim.burst.opt.beo.decouple_rw; + flags[6] = 1'b0; + // flags[6] = stim.burst.opt.beo.serialize; + flags[7] = stim.burst.opt.beo.src_reduce_len; + flags[11:8] = stim.burst.opt.src.cache; + flags[15:12] = stim.burst.opt.dst.cache; + flags[23:16] = stim.burst.opt.axi_id; + flags[31:26] = '0; + + result[31:0] = stim.burst.length; + result[63:32] = flags; + return result; + endfunction + + task axi_master_acquire_ars(); + @(posedge rst_n); + forever begin + automatic ax_beat_t ar_beat; + automatic result_t current_result; + // monitor ar + i_axi_iface_driver.mon_ar(ar_beat); + // and record contents + current_result.read_address = ar_beat.ax_addr; + current_result.read_length = ar_beat.ax_len; + current_result.read_size = ar_beat.ax_size; + ar_seen_result.push_back(current_result); + end + endtask : axi_master_acquire_ars + + task axi_master_acquire_rs(); + @(posedge rst_n); + forever begin + automatic r_beat_t r_beat; + automatic result_t current_result; + wait (ar_seen_result.size() > 0); + current_result = ar_seen_result.pop_front(); + i_axi_iface_driver.mon_r(r_beat); + if ($isunknown(r_beat.r_data)) begin + // drop current result + // as it is a prefetched one + end else begin + inflight_results_after_reads.push_back(current_result); + end + // four reads per descriptor in the 64-bit case + i_axi_iface_driver.mon_r(r_beat); + i_axi_iface_driver.mon_r(r_beat); + i_axi_iface_driver.mon_r(r_beat); + if (!r_beat.r_last) begin + $error("R acquisition has come out-of-sync."); + end + end + endtask : axi_master_acquire_rs + + task axi_master_acquire_aw_w_and_irqs(); + fork + axi_master_acquire_aw(); + axi_master_acquire_w(); + axi_master_acquire_irqs(); + join + endtask : axi_master_acquire_aw_w_and_irqs + + task axi_master_acquire_aw(); + // set to one to skip first submission of what would be an invalid result + automatic result_t current_result; + @(posedge rst_n); + forever begin + automatic ax_beat_t aw_beat; + i_axi_iface_driver.mon_aw(aw_beat); + + wait (inflight_results_submitted_to_be.size() > 0); + current_result = inflight_results_submitted_to_be.pop_front(); + current_result.write_address = aw_beat.ax_addr; + current_result.write_length = aw_beat.ax_len; + current_result.write_size = aw_beat.ax_size; + aw_seen_result.push_back(current_result); + end + endtask + task axi_master_acquire_w(); + automatic result_t current_result; + @(posedge rst_n); + forever begin + automatic w_beat_t w_beat; + i_axi_iface_driver.mon_w(w_beat); + wait (aw_seen_result.size() > 0); + current_result = aw_seen_result.pop_front(); + current_result.write_data = w_beat.w_data; + w_seen_result.push_back(current_result); + end + endtask : axi_master_acquire_w + task axi_master_acquire_irqs(); + automatic result_t current_result; + @(posedge rst_n); + forever begin + automatic b_beat_t b_beat; + automatic result_t current_result; + + // HACK: I'm taking advantage of the knowledge that the irq and + // B happen in the same cycle + i_axi_iface_driver.mon_b(b_beat); + wait(w_seen_result.size() > 0); + current_result = w_seen_result.pop_front(); + current_result.did_irq = irq; + result_queue.push_back(current_result); + end + endtask : axi_master_acquire_irqs + + task backend_tx_done_notifier(); + automatic int unsigned rand_success, cycles; + @(posedge rst_n); + forever begin + wait (backend_busy); + + /* EXPAND RAND_WAIT FROM COMMON_VERIF_PKG */ + rand_success = randomize(cycles) with { + cycles >= 5; + cycles <= 10; + }; + assert (rand_success) else $error("Failed to randomize wait cycles!"); + repeat (cycles) @(posedge clk); + /* END EXPAND RAND_WAIT FROM COMMON_VERIF_PKG */ + + #(APPL_DELAY); + dma_be_rsp_valid = 1'b1; + wait (dma_be_rsp_ready); + + @(posedge clk); + #(APPL_DELAY); + dma_be_rsp_valid = 1'b0; + backend_busy = 1'b0; + end + endtask + + task acquire_bursts(); + automatic result_t current_result; + automatic idma_req_t current_burst; + @(posedge rst_n); + forever begin + forever begin + @(posedge clk); + #(ACQ_DELAY); + if (dma_be_req_valid && dma_be_req_ready) break; + end + current_burst = dma_be_req; + wait (inflight_results_after_reads.size() > 0); + current_result = inflight_results_after_reads.pop_front(); + current_result.burst = current_burst; + inflight_results_submitted_to_be.push_back(current_result); + end + endtask + + task backend_acceptor(); + @(posedge rst_n); + forever begin + wait (!backend_busy); + @(posedge clk); + #(APPL_DELAY) + dma_be_req_ready = 1'b1; + #(ACQ_DELAY - APPL_DELAY); + forever begin + if (dma_be_req_valid) begin + break; + end + @(posedge clk); + #(ACQ_DELAY); + end + @(posedge clk); + #(APPL_DELAY) + dma_be_req_ready = 1'b0; + backend_busy = 1'b1; + end + endtask + + // score the results + initial begin : proc_scoring + static logic finished_simulation = 1'b0; + + static int number_of_descriptors = 0; + static int read_addr_errors = 0; + static int read_length_errors = 0; + static int read_size_errors = 0; + static int write_addr_errors = 0; + static int write_length_errors = 0; + static int write_data_errors = 0; + static int write_size_errors = 0; + static int burst_errors = 0; + static int irq_errors = 0; + + generate_stimuli(); + load_descriptors_into_memory(); + + fork + apply_stimuli(); + collect_responses(); + begin : watchdog + @(posedge rst_n); + repeat (SimulationTimeoutCycles) begin + @(posedge clk); + end + end : watchdog + begin : scorer + @(posedge rst_n); + + while (golden_queue.size() > '0) begin + automatic result_t golden; + automatic result_t actual; + wait (result_queue.size() > 0); + golden = golden_queue.pop_front(); + actual = result_queue.pop_front(); + if (golden.burst !== actual.burst) begin + $error("Burst mismatch @ %d:\ngolden: %p\nactual: %p", + number_of_descriptors, golden.burst, actual.burst); + ++burst_errors; + end + if (golden.read_address !== actual.read_address) begin + $error("Read address mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.read_address, actual.read_address); + ++read_addr_errors; + end + if (golden.read_length !== actual.read_length) begin + $error("Read length mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.read_length, actual.read_length); + ++read_length_errors; + end + if (golden.read_size !== actual.read_size) begin + $error("Read size mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.read_size, actual.read_size); + ++read_size_errors; + end + if (golden.write_address !== actual.write_address) begin + $error("Write address mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.write_address, actual.write_address); + ++write_addr_errors; + end + if (golden.write_length !== actual.write_length) begin + $error("Write length mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.write_length, actual.write_length); + ++write_length_errors; + end + if (golden.write_size !== actual.write_size) begin + $error("Write size mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.write_size, actual.write_size); + ++write_size_errors; + end + if (golden.write_data !== actual.write_data) begin + $error("Write data mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.write_data, actual.write_data); + ++write_data_errors; + end + if (golden.did_irq !== actual.did_irq) begin + $error("IRQ mismatch @ %d:\ngolden: %x\nactual: %x", + number_of_descriptors, golden.did_irq, actual.did_irq); + ++irq_errors; + end + ++number_of_descriptors; + end + // wait for frontend to signal no longer busy + forever begin + automatic logic [63:0] status; + automatic logic error; + i_reg_iface_driver.send_read( + .addr(IDMA_DESC64_STATUS_OFFSET), + .data(status), + .error(error) + ); + if (status[0] != 1'b1) break; + end + finished_simulation = 1'b1; + end : scorer + join_any + disable fork; + if (!finished_simulation) begin + $error("Simulation timed out."); + end else begin + $display("Simulation finished in a timely manner."); + end + $display("Saw %d descriptors." , number_of_descriptors); + $display("Read address errors: %d", read_addr_errors); + $display("Read length errors: %d", read_length_errors); + $display("Read size errors: %d", read_size_errors); + $display("Write address errors: %d", write_addr_errors); + $display("Write length errors: %d", write_length_errors); + $display("Write size errors: %d", write_size_errors); + $display("Write data errors: %d", write_data_errors); + $display("Burst errors: %d", burst_errors); + $display("IRQ errors: %d", irq_errors); + $finish(); + end : proc_scoring + +endmodule : tb_idma_desc64_top diff --git a/util/trace_idma.py b/util/trace_idma.py new file mode 100644 index 00000000..ad57b213 --- /dev/null +++ b/util/trace_idma.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +# Copyright 2022 ETH Zurich and University of Bologna. +# Solderpad Hardware License, Version 0.51, see LICENSE for details. +# SPDX-License-Identifier: SHL-0.51 + +# Author: Thomas Benz + +"""Functions used to parse and evaluate iDMA trace files.""" +import ast +import sys +from pprint import pprint as pp + + +def strb_to_bytes(strobe: int) -> int: + """Returns the amount of valid bytes in a strobe value""" + + res = 0 + + # iterate over strobe + for byte_en in str(bin(strobe))[2:]: + if byte_en == '1': + res += 1 + + return res + + +def read_trace(fn: str) -> list: + """Reads a trace file and returns it as a list of dict objects""" + + # resulting list of trace events + trace = [] + + # read and parse file + with open(fn, 'r', encoding='utf8') as tf: + for line in tf: + trace_dict = ast.literal_eval(line) + trace.append(trace_dict) + + return trace + + +def extract_parameter(trace: list) -> dict: + """Extracts the parameter of the DMA backend the run resulted from""" + + return trace[0]['cnst'] + + +def get_global_utilization(trace: list, data_width: int) -> list: + """Calculates the global utilization [read, write] of the DMA""" + + read_data = 0 # in bytes + write_data = 0 # in bytes + + for ele in trace: + # add read contribution + if ele['axib']['r_ready'] and ele['axib']['r_valid']: + read_data += data_width // 8 + + # add write contribution + if ele['axib']['w_ready'] and ele['axib']['w_valid']: + write_data += strb_to_bytes(ele['axib']['w_strb']) + + # calculate maximum possible amount of data + max_data = len(trace) * data_width // 8 + + return [read_data / max_data, write_data / max_data] + + +if __name__ == '__main__': + _, filename = sys.argv + idma_trace = read_trace(filename) + idma_data_width = extract_parameter(idma_trace)['data_width'] + pp(get_global_utilization(idma_trace, idma_data_width))