diff --git a/Makefile b/Makefile index f1685fa1e79..f1e832ec22a 100644 --- a/Makefile +++ b/Makefile @@ -47,6 +47,8 @@ $(warning must set CVA6_REPO_DIR to point at the root of CVA6 sources -- doing i export CVA6_REPO_DIR = $(abspath $(root-dir)) endif +export HPDCACHE_DIR = $(CVA6_REPO_DIR)/vendor/openhwgroup/cvhpdcache + support_verilator_4 := $(shell ($(verilator) --version | grep '4\.') > /dev/null 2>&1 ; echo $$?) ifeq ($(support_verilator_4), 0) verilator_threads := 1 diff --git a/core/Flist.cva6 b/core/Flist.cva6 index d4211c9060e..44dde45a4f6 100644 --- a/core/Flist.cva6 +++ b/core/Flist.cva6 @@ -154,6 +154,7 @@ ${CVA6_REPO_DIR}/core/cache_subsystem/cache_ctrl.sv ${CVA6_REPO_DIR}/core/cache_subsystem/cva6_icache_axi_wrapper.sv ${CVA6_REPO_DIR}/core/cache_subsystem/std_cache_subsystem.sv ${CVA6_REPO_DIR}/core/cache_subsystem/std_nbdcache.sv +-F ${CVA6_REPO_DIR}/vendor/openhwgroup/cvhpdcache/rtl/hpdcache_cva6.Flist // Physical Memory Protection // NOTE: pmp.sv modified for DSIM (unchanged for other simulators) diff --git a/vendor/openhwgroup/cvhpdcache/.gitignore b/vendor/openhwgroup/cvhpdcache/.gitignore new file mode 100644 index 00000000000..599abb88196 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/.gitignore @@ -0,0 +1,6 @@ +vnc_logs/ +veloce.map +VRMDATA/ +*.log +*.wlf +*.ucdb diff --git a/vendor/openhwgroup/cvhpdcache/CHANGELOG.md b/vendor/openhwgroup/cvhpdcache/CHANGELOG.md new file mode 100644 index 00000000000..1ce8d2fc85e --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/CHANGELOG.md @@ -0,0 +1,80 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added + +### Removed + +### Changed + +### Fixed + +## [3.0.0] 2023-10-08 + +### Added + +- Add support for virtually-indexed addressing + +### Fixed + +- Fix forwarding logic of uncacheable Icache response in the cva6 cache subsystem. +- Fix wrong mask signal when implementing the MSHR in registers + +## [2.1.0] - 2023-09-25 + +### Added + +- Add additional configuration to implement MSHR in registers (when the number + of entries is low) + +### Fixed + +- Fix cache data SRAM chip-select generation when word width is different than + 64 bits (e.g. 32 bits) + +## [2.0.0] - 2023-09-18 + +### Added + +- Add parameters in the HPDcache module to define the types of interfaces to + the memory +- Add helper verilog header file with macros to ease the type definition of + interfaces to the memory +- Add new event signals in the HPDCache top module +- Add generic single-port RAM macros with byte-enable signals +- Add parameters in the package to choose between RAM macros implementing + byte-enable or bitmask for the different RAMs instances +- Add additional assertions to verify parameters +- Add additional configuration signal to inhibit write coalescing in the write + buffer + +### Removed + +- Remove base_id ports in the HPDCache top module +- Remove nettype (wire,var) in ports as it looks like is badly supported in + some cases by some simulation tools + +### Changed + +- Split the hpdcache_pkg into: (1) the hpdcache_pkg contains internally defined + parameters; (2) a new hpdcache_params_pkg that defines user parameters +- New selection policy of ready requests in the replay table. It gives priority + to requests in the same linked list. +- The write buffer now accepts writes from requesters in a pending slot when it + is waiting for the internal arbiter to forward the data to the NoC. + +### Fixed + +- Correctly support HPDCACHE_ACCESS_WORDS=1 +- Correctly support HPDCACHE_ACCESS_WORDS=HPDCACHE_CL_WORDS +- Fix width of the nlines count register in the HW memory prefetcher. + +## [1.0.0] - 2023-02-22 + +### Added +- Initial release to the OpenHW Group diff --git a/vendor/openhwgroup/cvhpdcache/CODEOWNERS b/vendor/openhwgroup/cvhpdcache/CODEOWNERS new file mode 100644 index 00000000000..ce692f92417 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/CODEOWNERS @@ -0,0 +1,2 @@ +# Global Owners +* @cfuguet diff --git a/vendor/openhwgroup/cvhpdcache/LICENSE b/vendor/openhwgroup/cvhpdcache/LICENSE new file mode 100644 index 00000000000..279d97adb07 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/LICENSE @@ -0,0 +1,97 @@ +Solderpad Hardware License v2.1 + +This license operates as a wraparound license to the Apache License +Version 2.0 (the “Apache License”) and incorporates the terms and +conditions of the Apache License (which can be found here: +http://apache.org/licenses/LICENSE-2.0), with the following additions and +modifications. It must be read in conjunction with the Apache License. +Section 1 below modifies definitions and terminology in the Apache +License and Section 2 below replaces Section 2 of the Apache License. +The Appendix replaces the Appendix in the Apache License. You may, at +your option, choose to treat any Work released under this license as +released under the Apache License (thus ignoring all sections written +below entirely). + +1. Terminology in the Apache License is supplemented or modified as +follows: + +“Authorship”: any reference to ‘authorship’ shall be taken to read +“authorship or design”. + +“Copyright owner”: any reference to ‘copyright owner’ shall be taken to +read “Rights owner”. + +“Copyright statement”: the reference to ‘copyright statement’ shall be +taken to read ‘copyright or other statement pertaining to Rights’. + +The following new definition shall be added to the Definitions section of +the Apache License: + +“Rights” means copyright and any similar right including design right +(whether registered or unregistered), rights in semiconductor +topographies (mask works) and database rights (but excluding Patents and +Trademarks). + +The following definitions shall replace the corresponding definitions in +the Apache License: + +“License” shall mean this Solderpad Hardware License version 2.1, being +the terms and conditions for use, manufacture, instantiation, adaptation, +reproduction, and distribution as defined by Sections 1 through 9 of this +document. + +“Licensor” shall mean the owner of the Rights or entity authorized by the +owner of the Rights that is granting the License. + +“Derivative Works” shall mean any work, whether in Source or Object form, +that is based on (or derived from) the Work and for which the editorial +revisions, annotations, elaborations, or other modifications represent, +as a whole, an original work of authorship or design. For the purposes of +this License, Derivative Works shall not include works that remain +reversibly separable from, or merely link (or bind by name) or physically +connect to or interoperate with the Work and Derivative Works thereof. + +“Object” form shall mean any form resulting from mechanical +transformation or translation of a Source form or the application of a +Source form to physical material, including but not limited to compiled +object code, generated documentation, the instantiation of a hardware +design or physical object or material and conversions to other media +types, including intermediate forms such as bytecodes, FPGA bitstreams, +moulds, artwork and semiconductor topographies (mask works). + +“Source” form shall mean the preferred form for making modifications, +including but not limited to source code, net lists, board layouts, CAD +files, documentation source, and configuration files. + +“Work” shall mean the work of authorship or design, whether in Source or +Object form, made available under the License, as indicated by a notice +relating to Rights that is included in or attached to the work (an +example is provided in the Appendix below). + +2. Grant of License. Subject to the terms and conditions of this License, +each Contributor hereby grants to You a perpetual, worldwide, +non-exclusive, no-charge, royalty-free, irrevocable license under the +Rights to reproduce, prepare Derivative Works of, make, adapt, repair, +publicly display, publicly perform, sublicense, and distribute the Work +and such Derivative Works in Source or Object form and do anything in +relation to the Work as if the Rights did not exist. + +APPENDIX + +Copyright 2023 CEA* +*Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + +SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + +Licensed under the Solderpad Hardware License v 2.1 (the “License”); you +may not use this file except in compliance with the License, or, at your +option, the Apache License version 2.0. You may obtain a copy of the +License at + +https://solderpad.org/licenses/SHL-2.1/ + +Unless required by applicable law or agreed to in writing, any work +distributed under the License is distributed on an “AS IS” BASIS, WITHOUT +WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +License for the specific language governing permissions and limitations +under the License. diff --git a/vendor/openhwgroup/cvhpdcache/README.md b/vendor/openhwgroup/cvhpdcache/README.md new file mode 100644 index 00000000000..76f638ad8fd --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/README.md @@ -0,0 +1,45 @@ +# OpenHW Core-V High-Performance L1 Dcache (CV-HPDcache) + +The HPDcache is an open-source High-Performance, Multi-requester, Out-of-Order L1 Dcache for RISC-V cores and accelerators. + + +## Directory Structure + + + + + + + + + + + + + + + + + + + + + + +
DirectoryDescription
rtlContains the file lists to be used for the compiling of the HPDcache
rtl/srcContains the SystemVerilog RTL sources of the HPDcache
rtl/src/targetContains processor-dependent sources (e.g. adapter modules for the CVA6 core)
docsContains documentation of the HPDcache
+ + +## Documentation + +The HPDcache specification document can be found in the *docs/hpdcache_spec_document* folder. +It is written in LaTeX. +You cand find pre-compiled PDF documents in *docs/hpdcache_spec_document/release*. + +If you need to recompile the specification document, a dedicated *Makefile* is in the specification folder. +This *Makefile* needs the *latexmk* command-line tool (included in most common LaTeX distributions) and the *inkscape* tool to convert SVG images into PDF. + + +## Licensing + +The HPDcache is released under the Solderpad Hardware License (version 2.1). +Please refer to the [LICENSE](LICENSE) file for further information. diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/.gitignore b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/.gitignore new file mode 100644 index 00000000000..4c96dcadd6a --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/.gitignore @@ -0,0 +1,3 @@ +build/ +pdf/ +supplement/package diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/Makefile b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/Makefile new file mode 100644 index 00000000000..a9fd0c4fe61 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/Makefile @@ -0,0 +1,122 @@ +## +## Copyright 2023 CEA* +## *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) +## +## SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +## +## Licensed under the Solderpad Hardware License v 2.1 (the “License”); you +## may not use this file except in compliance with the License, or, at your +## option, the Apache License version 2.0. You may obtain a copy of the +## License at +## +## https://solderpad.org/licenses/SHL-2.1/ +## +## Unless required by applicable law or agreed to in writing, any work +## distributed under the License is distributed on an “AS IS” BASIS, WITHOUT +## WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +## License for the specific language governing permissions and limitations +## under the License. +## +## Author(s): Cesar Fuguet +## Date: February, 2023 +## Description: Makefile for the specification document of the HPDcache +## hardware IP +## +LATEXMK = latexmk -pdf +INKSCAPE = inkscape --without-gui +PDFVIEWER = evince +RM = rm -rf +RMDIR = rmdir -p +CP = cp -f +ECHO = echo -e +MKDIR = mkdir -p + +PDF_DIR = pdf +AUX_DIR = build +SOURCE_DIR = source +RELEASE_DIR = release + +VERSION = $(shell cat version) + +TARGET = hpdcache_spec +TEX_SOURCES = $(SOURCE_DIR)/$(TARGET).bib \ + $(SOURCE_DIR)/hpdcache_spec_changelog.tex \ + $(SOURCE_DIR)/hpdcache_spec_preamble.tex \ + $(AUX_DIR)/hpdcache_data_ram_organization.pdf \ + $(AUX_DIR)/hpdcache_request_arbiter.pdf \ + $(AUX_DIR)/hpdcache_core.pdf \ + $(AUX_DIR)/hpdcache_request_address_data_alignment.pdf \ + $(AUX_DIR)/hpdcache_data_ram_organization.emf \ + $(AUX_DIR)/hpdcache_request_arbiter.emf \ + $(AUX_DIR)/hpdcache_core.emf \ + $(AUX_DIR)/hpdcache_request_address_data_alignment.emf \ + $(AUX_DIR)/wave_ready_before_valid.pdf \ + $(AUX_DIR)/wave_valid_before_ready.pdf \ + $(AUX_DIR)/wave_ready_when_valid.pdf \ + $(AUX_DIR)/wave_back_to_back.pdf \ + $(AUX_DIR)/wave_ready_before_valid.emf \ + $(AUX_DIR)/wave_valid_before_ready.emf \ + $(AUX_DIR)/wave_ready_when_valid.emf \ + $(AUX_DIR)/wave_back_to_back.emf + +LATEXMK_ARGS = -bibtex \ + $(if $(VERBOSE),,-silent) \ + -pretex='\newcommand{\docversion}{$(VERSION)}' \ + -usepretex + +vpath %.svg $(SOURCE_DIR)/images +vpath %.svg $(SOURCE_DIR)/images/exported + +all: + @$(ECHO) "usage: make [target]\n" + @$(ECHO) "Target candidates:\n" + @$(ECHO) "pdf generate PDF document" + @$(ECHO) "release make a release of the PDF document" + @$(ECHO) "view view PDF document" + @$(ECHO) "clean clean auxiliary files" + @$(ECHO) "clean_pdf clean PDF file" + @$(ECHO) "distclean clean all generated files" + +.PHONY: release +release: $(RELEASE_DIR)/$(TARGET)-$(VERSION).pdf + +$(RELEASE_DIR)/$(TARGET)-$(VERSION).pdf: $(PDF_DIR)/$(TARGET).pdf + @$(MKDIR) $(dir $@) + $(CP) $< $@ + +.PHONY: pdf +pdf: $(PDF_DIR)/$(TARGET).pdf + +$(PDF_DIR)/$(TARGET).pdf: $(AUX_DIR)/$(TARGET).pdf + @$(MKDIR) $(dir $@) + $(CP) $< $@ + +$(AUX_DIR)/$(TARGET).pdf: $(SOURCE_DIR)/$(TARGET).tex \ + $(TEX_SOURCES) + @$(MKDIR) $(dir $@) + $(LATEXMK) $(LATEXMK_ARGS) --output-directory=$(dir $@) $< + +$(AUX_DIR)/%.pdf: %.svg + @$(MKDIR) $(dir $@) + $(INKSCAPE) --export-pdf=$@ --export-area-drawing $< + +$(AUX_DIR)/%.emf: %.svg + @$(MKDIR) $(dir $@) + $(INKSCAPE) --export-emf=$@ --export-area-drawing $< + +.PHONY: view +view: $(PDF_DIR)/$(TARGET).pdf + @$(ECHO) "Opening PDF viewer..." + @$(PDFVIEWER) $< >&/dev/null & + +.PHONY: clean clean_pdf distclean +clean: + @$(ECHO) "Cleaning build directory..." + @$(RM) $(AUX_DIR) + +clean_pdf: + @$(ECHO) "Cleaning generated PDF file..." + @$(RM) $(PDF_DIR)/$(TARGET).pdf + @$(RMDIR) $(PDF_DIR) >&/dev/null || true + +distclean: clean clean_pdf diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/latexmkrc b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/latexmkrc new file mode 100644 index 00000000000..364f8350542 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/latexmkrc @@ -0,0 +1,2 @@ +$ENV{'TEXINPUTS'}=':source:source/images:source/images/exported:build:'; +$ENV{'BIBINPUTS'}=':source:'; diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/release/hpdcache_spec-1.0.0-draft.pdf b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/release/hpdcache_spec-1.0.0-draft.pdf new file mode 100644 index 00000000000..33de1b69438 Binary files /dev/null and b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/release/hpdcache_spec-1.0.0-draft.pdf differ diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/hpdcache_spec.bib b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/hpdcache_spec.bib new file mode 100644 index 00000000000..d6cfd8b1707 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/hpdcache_spec.bib @@ -0,0 +1,67 @@ +%% +%% Copyright 2023 CEA* +%% *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) +%% +%% SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +%% +%% Licensed under the Solderpad Hardware License v 2.1 (the “License”); you +%% may not use this file except in compliance with the License, or, at your +%% option, the Apache License version 2.0. You may obtain a copy of the +%% License at +%% +%% https://solderpad.org/licenses/SHL-2.1/ +%% +%% Unless required by applicable law or agreed to in writing, any work +%% distributed under the License is distributed on an “AS IS” BASIS, WITHOUT +%% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +%% License for the specific language governing permissions and limitations +%% under the License. +%% +%% Author(s): Cesar Fuguet +%% Date: February, 2023 +%% Description: Bibliography of the specification document of the +%% HPDcache hardware IP +%% +@online{RISCV_spec, + title = {{The RISC-V Instruction Set Manual, Volume I: Unprivileged ISA}}, + author = {Waterman, Andrew and Asanovic, Krste}, + year = 2019, + url = {https://github.com/riscv/riscv-isa-manual/releases/download/Ratified-IMAFDQC/riscv-spec-20191213.pdf}, +} +@online{RISCV_privileged_spec, + title = {{The RISC-V Instruction Set Manual, Volume II: Privileged Architecture}}, + author = {Waterman, Andrew and Asanovic, Krste and Hauser, John}, + year = 2021, + url = {https://github.com/riscv/riscv-isa-manual/releases/download/Priv-v1.12/riscv-privileged-20211203.pdf}, +} + +@Techreport{arm_amba_2017, + author = {\mbox{Arm Limited}}, + year = "2020", + title = {{AMBA} {AXI} and {ACE} {Protocol} {Specification}}, + type = "Specification Document", + source = {https://developer.arm.com/documentation/ihi0022/hc/?lang=en}, + address = "110 Fulbourn Road, Cambridge, England", + pages = {440}, + language = {en}, + note = "", +} + +@INPROCEEDINGS{durand_vrp_2022, + author = {Durand, Yves and Guthmuller, Eric and Fuguet, Cesar and Fereyre, Jérôme and Bocco, Andrea and Alidori, Riccardo}, + booktitle = {2022 IEEE 29th Symposium on Computer Arithmetic (ARITH)}, + title = {Accelerating Variants of the Conjugate Gradient with the Variable Precision Processor}, + year = {2022}, + volume = {}, + number = {}, + pages = {51-57}, + doi = {10.1109/ARITH54963.2022.00017} +} + +@online{epac_website_2022, + title = {{European Processor Initiative (EPI) Accelerator Stream}}, + author = {}, + year = {2022}, + url = {https://www.european-processor-initiative.eu/accelerator/}, + lastaccessed = {February 23, 2023}, +} diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/hpdcache_spec.tex b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/hpdcache_spec.tex new file mode 100755 index 00000000000..5e5ae126f37 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/hpdcache_spec.tex @@ -0,0 +1,3404 @@ +%% +%% Copyright 2023 CEA* +%% *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) +%% +%% SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +%% +%% Licensed under the Solderpad Hardware License v 2.1 (the “License”); you +%% may not use this file except in compliance with the License, or, at your +%% option, the Apache License version 2.0. You may obtain a copy of the +%% License at +%% +%% https://solderpad.org/licenses/SHL-2.1/ +%% +%% Unless required by applicable law or agreed to in writing, any work +%% distributed under the License is distributed on an “AS IS” BASIS, WITHOUT +%% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +%% License for the specific language governing permissions and limitations +%% under the License. +%% +%% Author(s): Cesar Fuguet +%% Date: February, 2023 +%% Description: Specification document of the HPDcache hardware IP +%% +\documentclass[10pt,titlepage,twoside]{book} +\newcommand{\doctitle}{High-Performance, Multi-Requester, Out-of-Order, L1 Dcache (HPDcache)} +\newcommand{\docauthor}{César Fuguet} +\newcommand{\docdate}{February, 2023} + +\input{hpdcache_spec_preamble} + +\title{\doctitle} +\author{\docauthor} + +\addbibresource{hpdcache_spec.bib} + +\begin{document} + +\dominitoc + +% +% front-matter +% +\pagestyle{plain} + +\begin{center} + + \mbox{}\vspace{3cm} + + {\huge\doctitle}\\ + + \vfill + + {\normalsize% + \begin{tabular}{p{.4\textwidth}l} + \toprule + Author: + & \mbox{\docauthor} \\ + \midrule + Release date: + & \mbox{\docdate} + \end{tabular}} + + \vfill + + This document version is~\docversion + + {\footnotesize\sffamily\mbox{}Copyright~\textcopyright~2023\\ + \mbox{\emph{Commissariat à l'Energie Atomique et aux Energies Alternatives (CEA)}}} +\end{center} + +\clearpage + +Contributors to all versions of this specification document:\\ +Cesar Fuguet, +Adrian Evans, +Tanuj-Kumar Khandelwal, +Nicolas Perbost. + +\clearpage +\tableofcontents +\listoftables +\listoffigures + +\pagestyle{fancystyle} + +% +% acronyms +% +\clearpage +\chapter*{Table of Acronyms} + +\begin{acronym}[YYYYYY] +\acro{CEA}{Commissariat à l'Energie Atomique et aux Energies Alternatives} +\acro{HPDcache}{High-Performance, Multi-Requester, Multi-Issue, Out-of-Order, L1 Dcache} +\acro{DMA}{Direct Memory Access} +\acro{GPP}{General Purpose Processor} +\acro{AMBA}{Advanced Microcontroller Bus Architecture} +\acro{AXI}{Advance eXtensible Interface} +\acro{NoC}{Network-on-Chip} +\acro{PoS}{Point-of-Serialization} +\acro{SLC}{System-Level Cache} +\acro{LSU}{Load-Store Unit} +\acro{MMU}{Memory Management Unit} +\acro{AMO}{Atomic Memory Operation} +\acro{CSR}{Configuration-and-Status Register} +\acro{CMO}{Cache Maintenance Operation} +\acro{HBM}{High-Bandwidth Memory} +\acro{MSHR}{Miss Status Holding Register} +\acro{RTAB}{Replay Table} +\acro{MCR}{Memory Consistency Rule} +\acro{RVWMO}{RISC-V Weak Memory Ordering} +\acro{WBUF}{Write Buffer} +\acro{ISA}{Instruction Set Architecture} +\acro{ASIC}{Application Specific Integrated Circuit} +\acro{FPGA}{Field-Programmable Gate Array} +\acro{SRAM}{Static Random-Access Memory} +\acro{RTL}{Register-Transfer Level} +\acro{LR}{Load-Reserved} +\acro{SC}{Store-Conditional} +\acro{OS}{Operating System} +\end{acronym} +\newpage + +\ifdefined\isdraft +\listoftodos +\newpage +\fi + + +% +% body +% +\input{hpdcache_spec_changelog} + +\chapter{Feature Specification}% +\chalabel{features} +\minitoc +\newpage + +This \acf{HPDcache} is the responsible for serving data accesses issued by a RISC-V core, tightly-coupled accelerators and hardware memory prefetchers. +All these "clients" are called requesters. + +The \ac{HPDcache} implements a hardware pipeline capable of serving one request per cycle. +An arbiter in the requesters' interface of the \ac{HPDcache} guarantees the correct behavior when there are multiple requesters. +This is illustrated in~\figref{dcache_req_arb}. + +\begin{figure}[htbp] + \includegraphics[width=\textwidth]{hpdcache_request_arbiter.pdf} + \caption{\figlabel{dcache_req_arb}High-Level View of the HPDcache Sub-System} +\end{figure} + +\newpage +\section{List of features} + +\begin{itemize}[itemsep=0em] +\item Support for multiple outstanding requests per requester. +\item Support for multiple outstanding read misses and writes to memory. +\item Processes one request per cycle. +\item Any given requester can access 1 to 32 bytes of a cacheline per cycle. +\item Reduced energy consumption by limiting the number of RAMs consulted per request. +\item Fixed priority arbiter between requesters: the requester port with the lowest index has the highest priority. +\item Non-allocate, write-through policy. +\item Hardware write-buffer to mask the latency of write acknowledgements from the memory system. +\item Internal, configurable, hardware, memory-prefetcher that supports up to 4 simultaneous prefetching streams. +\item Compliance with \ac{RVWMO}. + \begin{itemize} + \item For address-overlapping transactions, the cache guarantees that these are committed in the order in which they are consumed from the requesters. + \item For non-address-overlapping transactions, the cache may execute them in an out-of-order fashion to improve performance. + \end{itemize} +\item Support for \acp{CMO}: cache invalidation operations, and memory fences for multi-core synchronisation. Cache invalidation operations support the ones defined in the RISC-V CMO Standard. +\item Memory-mapped \acp{CSR} for runtime configuration of the cache, status and performance monitoring. +\item Ready-Valid, 8 channels (4 request/4 response), interface to the memory. This interface, cache memory interface (CMI), can be easily adapted to mainstream \ac{NoC} interfaces like \acs{AMBA} AXI~\cite{arm_amba_2017}. +\item An adapter for interfacing with AXI5 is provided. +\end{itemize} + +\newpage +\section{Synthesis-time (static) Configuration Parameters} + +The \ac{HPDcache} has several static configuration parameters. +These parameters must be defined at compilation/synthesis. + +\Tabref{dcache_parameters} summarizes the list of parameters that can be set when integrating the \ac{HPDcache}. +In \apxref{implementations}, we describe different systems where the \ac{HPDcache} was integrated, and we list the parameters used in those implementations. + +\begin{table}[h!] +\begin{center} +\caption{HPDcache synthesis-time parameters}% +\tablabel{dcache_parameters} +{\footnotesize% +\begin{tabular}{p{.4\textwidth}p{.6\textwidth}} +\toprule% +$\mathsf{CONF\_HPDCACHE\_PA\_WIDTH}$ &% +Physical address width (in bits) \\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_SETS}$ &% +Number of sets \\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_WAYS}$ &% +Number of ways (associativity) \\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_WORD\_WIDTH}$ &% +Width (in bits) of a data word\\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_CL\_WORDS}$ &% +Number of words in a cacheline\\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_REQ\_WORDS}$ &% +Number of words in the data channels from/to requesters \\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_REQ\_TRANS\_ID\_WIDTH}$ &% +Width (in bits) of the transaction ID from requesters \\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_REQ\_SRC\_ID\_WIDTH}$ &% +Width (in bits) of the source ID from requesters \\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_MSHR\_SETS}$ &% +Number of sets in the \ac{MSHR} \\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_MSHR\_WAYS}$ &% +Number of ways (associativity) in the \ac{MSHR} \\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_WBUF\_DIR\_ENTRIES}$ &% +Number of entries in the directory of the write buffer \\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_WBUF\_DATA\_ENTRIES}$ &% +Number of entries in the data part of the write buffer \\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_WBUF\_WORDS}$ &% +Number of data words per entry in the write buffer \\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_WBUF\_TIMECNT\_WIDTH}$ &% +Width (in bits) of the time counter in write buffer entries \\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_RTAB\_ENTRIES}$ &% +Number of entries in the replay table \\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_MEM\_WORDS}$ &% +Number of words in the data channels from/to the memory interface \\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_MEM\_ID\_WIDTH}$ &% +Width (in bits) of the transaction ID from the memory interface \\ +\end{tabular}} +\end{center} +\end{table} + +Some parameters are not directly related with functionality (\tabref{dcache_physical_parameters}). +Instead, they allow adapting the \ac{HPDcache} to physical constraints in the target technology node. +Generally, these control the mapping to SRAM macros.. +Depending on the technology, some dimensions are a more efficient than others (in terms of area, power, and performance). +These also need to be provided by the user at synthesis-time. + +\begin{table}[h!] +\begin{center} +\caption{HPDcache synthesis-time physical parameters}% +\tablabel{dcache_physical_parameters} +{\footnotesize% +\begin{tabular}{p{.42\textwidth}p{.58\textwidth}} +\toprule% +$\mathsf{CONF\_HPDCACHE\_DATA\_WAYS\_PER\_RAM\_WORD}$ &% +Number of ways in the same CACHE data SRAM word\\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_DATA\_SETS\_PER\_RAM}$ &% +Number of sets per RAM macro in the DATA array of the cache\\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_DATA\_RAM\_WBYTEENABLE}$ &% +Use RAM macros with byte-enable instead of bit-mask for the CACHE data array\\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_ACCESS\_WORDS}$ &% +Number of words that can be accessed simultaneously from the CACHE data array\\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_MSHR\_WAYS\_PER\_RAM\_WORD}$ &% +Number of ways in the same MSHR SRAM word\\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_MSHR\_SETS\_PER\_RAM}$ &% +Number of sets per RAM macro in the MSHR array of the cache\\ +\midrule% +$\mathsf{CONF\_HPDCACHE\_MSHR\_RAM\_WBYTEENABLE}$ &% +Use RAM macros with byte-enable instead of bit-mask for the MSHR\\ +\tablabel{dcache_technology_dependent_parameters} +\end{tabular}} +\end{center} +\end{table} + +Several internal configuration values are computed from the above ones. +\Tabref{dcache_internal_parameters} has a non-complete list of these internal configuration values that may be mentioned in the remainder of this document. + +\begin{table}[h!] +\begin{center} +\caption{HPDcache internal parameters}% +\tablabel{dcache_internal_parameters} +{\footnotesize% +\begin{tabular}{p{.35\textwidth}p{.65\textwidth}} +\toprule% +\multirow{2}{*}{$\mathsf{HPDCACHE\_CL\_WIDTH}$}% +& Width (in bits) of a cacheline\\% +& $\mathsf{CONF\_HPDCACHE\_CL\_WORDS~\times~CONF\_HPDCACHE\_WORD\_WIDTH}$\\ +\midrule% +\multirow{2}{*}{$\mathsf{HPDCACHE\_NLINE\_WIDTH}$}% +& Width (in bits) of the CACHELINE index part of the address\\% +& $\mathsf{CONF\_HPDCACHE\_PA\_WIDTH - log_2(HPDCACHE\_CL\_WIDTH/8)}$\\ +\midrule% +\multirow{2}{*}{$\mathsf{HPDCACHE\_SET\_WIDTH}$}% +& Width (in bits) of the SET part of the address \\% +& $\mathsf{log_2(CONF\_HPDCACHE\_SETS)}$ \\ +\midrule% +\multirow{2}{*}{$\mathsf{HPDCACHE\_TAG\_WIDTH}$}% +& Width (in bits) of the TAG part of the address\\% +& $\mathsf{HPDCACHE\_NLINE\_WIDTH - HPDCACHE\_SET\_WIDTH}$\\ +\midrule% +\multirow{2}{*}{$\mathsf{HPDCACHE\_WBUF\_WIDTH}$}% +& Width (in bits) of an entry in the write-buffer\\% +& $\mathsf{CONF\_HPDCACHE\_WBUF\_WORDS~\times~CONF\_HPDCACHE\_WORD\_WIDTH}$\\ +\end{tabular}} +\end{center} +\end{table} + + +\chapter{Interfaces and Communication Protocols} +\minitoc +\newpage + +\section{Global Signals} +\seclabel{if_global_signals} + +\begin{table}[h!] +\caption{Global signals}% +\tablabel{if_global_signals} +{\footnotesize% +\begin{tabular}{p{.21\linewidth}p{.15\linewidth}p{.64\linewidth}} + \toprule + \textbf{Signal} + & \textbf{Source} + & \textbf{Description} \\ + \midrule + $\mathsf{CLK\_I}$ + & Clock source + & Global clock signal. + The \ac{HPDcache} is synchronous to the rising-edge of the clock.\\ + \midrule + $\mathsf{RST\_NI}$ + & Reset source + & Global reset signal. + Asynchronous, active LOW, reset signal.\\ + \midrule + $\mathsf{WBUF\_FLUSH\_I}$ + & System + & Force the write-buffer to send all pending writes. + Active HIGH, one-cycle, pulse signal. + Synchronous to $\mathsf{CLK\_I}$.\\ + \midrule + $\mathsf{WBUF\_EMPTY\_O}$ + & System + & Indicates if the write-buffer is empty (there is no pending write transactions). + When this signal is set to 1, the write-buffer is empty.\\ +\end{tabular}} +\end{table} + +\clearpage + +\section{Requesters' Request/Response Interface} + +This section describes the interfaces between the requesters and the \ac{HPDcache}. + +All these interfaces are synchronous to the rising edge of the global clock $\mathsf{CLK\_I}$ (\secref{if_global_signals}) + + +\subsection{Signal Descriptions} +\seclabel{if_requester_desc} + +\begin{table}[h!] +\caption{Request channel signals}% +\tablabel{if_req} +{\footnotesize% +\begin{tabular}{p{.38\linewidth}p{.07\linewidth}p{.55\linewidth}} + \toprule + \textbf{Signal} + & \textbf{Source} + & \textbf{Description} \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_VALID}$ + & Requester + & Indicates that the channel is signaling a valid request. + See \secref{if_valid_ready}.\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_READY}$ + & Cache + & Indicates that the cache is ready to accept a request. + See \secref{if_valid_ready}.\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_ADDR}$ + & Requester + & Target physical address of the request. + The address shall be aligned to the $\mathsf{HPDCACHE\_REQ\_SIZE}$ field. + See \secref{if_addr_data_alignment}.\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_OP}$ + & Requester + & Indicates the type of operation to be performed. + See \secref{if_req_op}.\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_WDATA}$ + & Requester + & Write data (little-endian). + It shall be naturally aligned to the address. + See \secref{if_addr_data_alignment}.\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_BE}$ + & Requester + & Byte-enable for write data (little-endian). + It shall be naturally aligned to the address. + See \secref{if_addr_data_alignment}.\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_SIZE}$ + & Requester + & Indicate the size of the access. + The size is encoded as the power-of-two of the number of bytes (e.g. 0 is $\mathsf{2^0~=~1}$, 5 is $\mathsf{2^5~=~32}$).\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_UNCACHEABLE}$ + & Requester + & Indicates whether the access needs to be cached (unset) or not (set). + Uncacheable accesses are directly forwarded to the memory. + See \secref{if_req_uncacheable}.\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_SID}$ + & Requester + & The identification tag for the requester. + It shall be identical to the index of the request port binded to that requester. + See \secref{if_req_sid}.\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_TID}$ + & Requester + & The identification tag for the request. + A requester can issue multiple requests. + The corresponding response from the cache will return this TID. + See \secref{if_req_tid}.\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_NEED\_RSP}$ + & Requester + & The identification tag for the request. + Indicates whether the request needs a response from the cache. + When unset, the cache will not issue a response for the corresponding request. + See \secref{if_req_need_rsp}.\\ +\end{tabular}} +\end{table} + +\begin{table}[h!] +\caption{Response channel signals}% +\tablabel{if_rsp} +{\footnotesize% +\begin{tabular}{p{.38\linewidth}p{.07\linewidth}p{.55\linewidth}} + \toprule + \textbf{Signal} + & \textbf{Source} + & \textbf{Description} \\ + \midrule + $\mathsf{HPDCACHE\_RSP\_VALID}$ + & Cache + & Indicates that the channel is signaling a valid response. + See \secref{if_valid_ready}.\\ + \midrule + $\mathsf{HPDCACHE\_RSP\_RDATA}$ + & Cache + & Response read data. + It shall be naturally aligned to the request address. + See \secref{if_addr_data_alignment}.\\ + \midrule + $\mathsf{HPDCACHE\_RSP\_SID}$ + & Cache + & The identification tag for the requester. + It corresponds to the SID transferred with the request. + See \secref{if_req_sid}.\\ + \midrule + $\mathsf{HPDCACHE\_RSP\_TID}$ + & Cache + & The identification tag for the request. + It corresponds to the TID transferred with the request. + See \secref{if_req_tid}.\\ + \midrule + $\mathsf{HPDCACHE\_RSP\_ERROR}$ + & Cache + & Indicates whether there was an error condition while processing the request. + See \secref{if_rsp_error}.\\ +\end{tabular}} +\end{table} + +\clearpage +\section{Memory Request/Response Interfaces} +\seclabel{if_mem_desc} + +This section describes the interfaces between the \ac{HPDcache} and the \ac{NoC}/memory. + +All these interfaces are synchronous to the rising edge of the global clock $\mathsf{CLK\_I}$ (\secref{if_global_signals}) + +\subsection{Signal Descriptions} + +\begin{table}[h!] +\caption{Memory miss read request channel signals}% +\tablabel{if_mem_miss_req} +{\footnotesize% +\begin{tabular}{p{.38\linewidth}p{.07\linewidth}p{.55\linewidth}} + \toprule + \textbf{Signal} + & \textbf{Source} + & \textbf{Description} \\ + \midrule + $\mathsf{MEM\_REQ\_MISS\_READ\_VALID}$ + & Cache + & Indicates that the channel is signaling a valid request.\\ + \midrule + $\mathsf{MEM\_REQ\_MISS\_READ\_READY}$ + & NoC + & Indicates that the \ac{NoC} is ready to accept a request.\\ + \midrule + $\mathsf{MEM\_REQ\_MISS\_READ\_ADDR}$ + & Cache + & Target physical address of the request. + The address shall be aligned to the $\mathsf{MEM\_REQ\_MISS\_READ\_SIZE}$ field. + See \secref{if_addr_data_alignment}.\\ + \midrule + $\mathsf{MEM\_REQ\_MISS\_READ\_LEN}$ + & Cache + & Indicates the number of transfers in a burst minus one. + \textbf{In this interface, for this version, this number is always 0 (one transfer)}. + \textbf{However, bigger values may be used in the future. Thus, it should be decoded}.\\ + \midrule + $\mathsf{MEM\_REQ\_MISS\_READ\_SIZE}$ + & Cache + & Indicate the size of the access. + The size is encoded as the power-of-two of the number of bytes. + \textbf{In the current design implementation, the size value is equal to $\mathbf{log_2(HPDCACHE\_CL\_WIDTH/8)}$}. + \textbf{However, smaller values may be used in the future. Thus, it should be decoded}.\\ + \midrule + $\mathsf{MEM\_REQ\_MISS\_READ\_ID}$ + & Cache + & The identification tag for the request\\ + \midrule + $\mathsf{MEM\_REQ\_MISS\_READ\_COMMAND}$ + & Cache + & Indicates the type of operation to be performed. + \textbf{This interface only issues READ operations}.\\ + \midrule + $\mathsf{MEM\_REQ\_MISS\_READ\_ATOMIC}$ + & Cache + & In case of atomic operations, it indicates its type. + \textbf{In this interface, this signal is not used, thus its value shall be ignored}.\\ + \midrule + $\mathsf{MEM\_REQ\_MISS\_READ\_CACHEABLE}$ + & Cache + & This is a hint for the cache hierarchy in the system. + It indicates if the request can be allocated by the cache hierarchy. + That is, data can be prefetched from memory or can be reused for multiple read transactions. + \textbf{This bit is always set in this interface}.\\ +\end{tabular}} +\end{table} + +\begin{table}[h!] +\caption{Memory miss read response channel signals}% +\tablabel{if_mem_miss_rsp} +{\footnotesize% +\begin{tabular}{p{.38\linewidth}p{.07\linewidth}p{.55\linewidth}} + \toprule + \textbf{Signal} + & \textbf{Source} + & \textbf{Description} \\ + \midrule + $\mathsf{MEM\_RESP\_MISS\_READ\_VALID}$ + & NoC + & Indicates that the channel is signaling a valid response.\\ + \midrule + $\mathsf{MEM\_RESP\_MISS\_READ\_READY}$ + & Cache + & Indicates that the cache is ready to accept a response.\\ + \midrule + $\mathsf{MEM\_RESP\_MISS\_READ\_ERROR}$ + & NoC + & Indicates whether there was an error condition while processing the request.\\ + \midrule + $\mathsf{MEM\_RESP\_MISS\_READ\_ID}$ + & NoC + & The identification tag for the request. + It corresponds to the ID transferred with the request. + See \secref{if_addr_data_alignment}.\\ + \midrule + $\mathsf{MEM\_RESP\_MISS\_READ\_DATA}$ + & NoC + & Response read data. + It shall be naturally aligned to the request address. + See \secref{if_addr_data_alignment}.\\ + \midrule + $\mathsf{MEM\_RESP\_MISS\_READ\_LAST}$ + & NoC + & Indicates the last transfer in a read response burst.\\ +\end{tabular}} +\end{table} + +\clearpage +\begin{table}[h!] +\caption{Memory write-buffer write request channel signals}% +\tablabel{if_mem_wbuf_req} +{\footnotesize% +\begin{tabular}{p{.38\linewidth}p{.07\linewidth}p{.55\linewidth}} + \toprule + \textbf{Signal} + & \textbf{Source} + & \textbf{Description} \\ + \midrule + $\mathsf{MEM\_REQ\_WBUF\_WRITE\_VALID}$ + & Cache + & Indicates that the channel is signaling a valid request.\\ + \midrule + $\mathsf{MEM\_REQ\_WBUF\_WRITE\_READY}$ + & NoC + & Indicates that the cache is ready to accept a response.\\ + \midrule + $\mathsf{MEM\_REQ\_WBUF\_WRITE\_ADDR}$ + & Cache + & Target physical address of the request. + The address shall be aligned to the $\mathsf{MEM\_REQ\_WBUF\_WRITE\_SIZE}$ field. + See \secref{if_addr_data_alignment}.\\ + \midrule + $\mathsf{MEM\_REQ\_WBUF\_WRITE\_LEN}$ + & Cache + & Indicates the number of transfers in a burst minus one. + \textbf{In this interface, this number is always 0 (one transfer)}. + \textbf{However, bigger values may be used in the future. + Thus, it should be decoded}.\\ + \midrule + $\mathsf{MEM\_REQ\_WBUF\_WRITE\_SIZE}$ + & Cache + & Indicate the size of the access. + The size is encoded as the power-of-two of the number of bytes. + \textbf{In this interface, the size shall be less or equal to $\mathbf{log_2(CONF\_HPDCACHE\_WBUF\_WORDS)}$}.\\ + \midrule + $\mathsf{MEM\_REQ\_WBUF\_WRITE\_ID}$ + & Cache + & The identification tag for the request.\\ + \midrule + $\mathsf{MEM\_REQ\_WBUF\_WRITE\_COMMAND}$ + & Cache + & Indicates the type of operation to be performed. + \textbf{In this interface, this signal is always a WRITE operation}.\\ + \midrule + $\mathsf{MEM\_REQ\_WBUF\_WRITE\_ATOMIC}$ + & Cache + & In case of atomic operations, it indicates its type. + \textbf{In this interface, this signal is not used, thus its value shall be ignored}.\\ + \midrule + $\mathsf{MEM\_REQ\_WBUF\_WRITE\_CACHEABLE}$ + & Cache + & This is a hint for the cache hierarchy in the system. + It indicates if the write is bufferable by the cache hierarchy. + This means that the write must be visible in a timely manner at the final destination. + However, write responses can be obtained from an intermediate point. + \textbf{This bit is always set in this interface}.\\ +\end{tabular}} +\end{table} + +\begin{table}[h!] +\caption{Memory write-buffer write data request channel signals}% +\tablabel{if_mem_wbuf_data} +{\footnotesize% +\begin{tabular}{p{.38\linewidth}p{.07\linewidth}p{.55\linewidth}} + \toprule + \textbf{Signal} + & \textbf{Source} + & \textbf{Description} \\ + \midrule + $\mathsf{MEM\_REQ\_WBUF\_WRITE\_DATA\_VALID}$ + & Cache + & Indicates that the channel is transferring a valid data.\\ + \midrule + $\mathsf{MEM\_REQ\_WBUF\_WRITE\_DATA\_READY}$ + & NoC + & Indicates that the target is ready to accept the data.\\ + \midrule + $\mathsf{MEM\_REQ\_WBUF\_WRITE\_DATA\_WDATA}$ + & Cache + & Request write data. + It shall be naturally aligned to the request address. + See \secref{if_addr_data_alignment}.\\ + \midrule + $\mathsf{MEM\_REQ\_WBUF\_WRITE\_DATA\_BE}$ + & Cache + & Request write byte-enable. + It shall be naturally aligned to the request address. + See \secref{if_addr_data_alignment}.\\ + \midrule + $\mathsf{MEM\_REQ\_WBUF\_WRITE\_DATA\_LAST}$ + & Cache + & Indicates the last transfer in a write request burst.\\ +\end{tabular}} +\end{table} + +\begin{table}[h!] +\caption{Memory write-buffer write response channel signals}% +\tablabel{if_mem_wbuf_rsp} +{\footnotesize% +\begin{tabular}{p{.38\linewidth}p{.07\linewidth}p{.55\linewidth}} + \toprule + \textbf{Signal} + & \textbf{Source} + & \textbf{Description} \\ + \midrule + $\mathsf{MEM\_RESP\_WBUF\_WRITE\_VALID}$ + & NoC + & Indicates that the channel is transferring a valid write acknowledgement.\\ + \midrule + $\mathsf{MEM\_RESP\_WBUF\_WRITE\_READY}$ + & Cache + & Indicates that the cache is ready to accept the acknowledgement.\\ + \midrule + $\mathsf{MEM\_RESP\_WBUF\_WRITE\_IS\_ATOMIC}$ + & NoC + & Indicates whether the atomic operation was successfully processed (atomically). + \textbf{The value in this signal is ignored in this interface}.\\ + \midrule + $\mathsf{MEM\_RESP\_WBUF\_WRITE\_ERROR}$ + & NoC + & Indicates whether there was an error condition while processing the request.\\ + \midrule + $\mathsf{MEM\_RESP\_WBUF\_WRITE\_ID}$ + & NoC + & The identification tag for the request. + It corresponds to the ID transferred with the request.\\ +\end{tabular}} +\end{table} + +\clearpage + +\begin{table}[h!] +\caption{Memory read uncached request channel signals} +\tablabel{if_mem_uc_read} +{\footnotesize% +\begin{tabular}{p{.38\linewidth}p{.07\linewidth}p{.55\linewidth}} + \toprule + \textbf{Signal} + & \textbf{Source} + & \textbf{Description} \\ + \midrule + $\mathsf{MEM\_REQ\_UC\_READ\_VALID}$ + & Cache + & Indicates that the channel is signaling a valid request.\\ + \midrule + $\mathsf{MEM\_REQ\_UC\_READ\_READY}$ + & NoC + & Indicates that the \ac{NoC} is ready to accept a request.\\ + \midrule + $\mathsf{MEM\_REQ\_UC\_READ\_ADDR}$ + & Cache + & Target physical address of the request. + The address shall be aligned to the $\mathsf{MEM\_REQ\_MISS\_READ\_SIZE}$ field. + See \secref{if_addr_data_alignment}.\\ + \midrule + $\mathsf{MEM\_REQ\_UC\_READ\_LEN}$ + & Cache + & Indicates the number of transfers in a burst minus one. + \textbf{In this interface, this number is always 0 (one transfer)}.\\ + \midrule + $\mathsf{MEM\_REQ\_UC\_READ\_SIZE}$ + & Cache + & Indicate the size of the access. + The size is encoded as the power-of-two of the number of bytes.\\ + \midrule + $\mathsf{MEM\_REQ\_UC\_READ\_ID}$ + & Cache + & The identification tag for the request\\ + \midrule + $\mathsf{MEM\_REQ\_UC\_READ\_COMMAND}$ + & Cache + & Indicates the type of operation to be performed. + \textbf{In this interface, this signal is always a READ operation}.\\ + \midrule + $\mathsf{MEM\_REQ\_UC\_READ\_ATOMIC}$ + & Cache + & In case of atomic operations, it indicates its type. + \textbf{In this interface, this signal is not used, thus its value shall be ignored}.\\ + \midrule + $\mathsf{MEM\_REQ\_UC\_READ\_CACHEABLE}$ + & Cache + & This is a hint for the cache hierarchy in the system. + It indicates if the request can be allocated by the cache hierarchy. + That is, data can be prefetched from memory or can be reused for multiple read transactions. + \textbf{This bit is always unset in this interface. + Thus data shall come from the final destination}.\\ +\end{tabular}} +\end{table} + +\begin{table}[h!] +\caption{Memory read uncached response channel signals}% +\tablabel{if_mem_uc_read_rsp} +{\footnotesize% +\begin{tabular}{p{.38\linewidth}p{.07\linewidth}p{.55\linewidth}} + \toprule + \textbf{Signal} + & \textbf{Source} + & \textbf{Description} \\ + & & Signals are identical that for the miss response channel signals. +\end{tabular}} +\end{table} + +\clearpage + +\begin{table}[h!] +\caption{Memory write uncached request channel signals}% +\tablabel{if_mem_uc_write} +{\footnotesize% +\begin{tabular}{p{.38\linewidth}p{.07\linewidth}p{.55\linewidth}} + \toprule + \textbf{Signal} + & \textbf{Source} + & \textbf{Description} \\ + \midrule + $\mathsf{MEM\_REQ\_UC\_WRITE\_VALID}$ + & Cache + & Indicates that the channel is signaling a valid request.\\ + \midrule + $\mathsf{MEM\_REQ\_UC\_WRITE\_READY}$ + & NoC + & Indicates that the cache is ready to accept a response.\\ + \midrule + $\mathsf{MEM\_REQ\_UC\_WRITE\_ADDR}$ + & Cache + & Target physical address of the request. + The address shall be aligned to the $\mathsf{MEM\_REQ\_UC\_WRITE\_SIZE}$ field. + See \secref{if_addr_data_alignment}\\ + \midrule + $\mathsf{MEM\_REQ\_UC\_WRITE\_LEN}$ + & Cache + & Indicates the number of transfers in a burst minus one. + \textbf{In the current \acs{HPDcache} implementation, this number is always 0 (one transfer)}.\\ + \midrule + $\mathsf{MEM\_REQ\_UC\_WRITE\_SIZE}$ + & Cache + & Indicate the size of the access. + The size is encoded as the power-of-two of the number of bytes.\\ + \midrule + $\mathsf{MEM\_REQ\_UC\_WRITE\_ID}$ + & Cache + & The identification tag for the request.\\ + \midrule + $\mathsf{MEM\_REQ\_UC\_WRITE\_COMMAND}$ + & Cache + & Indicates the type of operation to be performed. + \textbf{In this interface, this signal is either a WRITE or an ATOMIC operation}.\\ + \midrule + $\mathsf{MEM\_REQ\_UC\_WRITE\_ATOMIC}$ + & Cache + & In case of atomic operations, it indicates its type.\\ + \midrule + $\mathsf{MEM\_REQ\_UC\_WRITE\_CACHEABLE}$ + & Cache + & This is a hint for the cache hierarchy in the system. + It indicates if the write is bufferable by the cache hierarchy. + This means that the write must be visible in a timely manner at the final destination. + However, write responses can be obtained from an intermediate point. + \textbf{This bit is always unset in this interface (thus transactions are non-bufferable, and the response shall come from the final destination)}.\\ +\end{tabular}} +\end{table} + +\begin{table}[h!] +\caption{Memory write data uncached request channel signals}% +\tablabel{if_mem_uc_write_data} +{\footnotesize% +\begin{tabular}{p{.38\linewidth}p{.07\linewidth}p{.55\linewidth}} + \toprule + \textbf{Signal} + & \textbf{Source} + & \textbf{Description} \\ + & & Signals are identical to those for the write data request channel signals. +\end{tabular}} +\end{table} + +\begin{table}[h!] +\caption{Memory write uncached response channel signals}% +\tablabel{if_mem_uc_write_rsp} +{\footnotesize% +\begin{tabular}{p{.38\linewidth}p{.07\linewidth}p{.55\linewidth}} + \toprule + \textbf{Signal} + & \textbf{Source} + & \textbf{Description} \\ + \midrule + $\mathsf{MEM\_RESP\_UC\_WRITE\_VALID}$ + & NoC + & Indicates that the channel is transferring a valid write acknowledgement.\\ + \midrule + $\mathsf{MEM\_RESP\_UC\_WRITE\_READY}$ + & Cache + & Indicates that the cache is ready to accept the acknowledgement.\\ + \midrule + $\mathsf{MEM\_RESP\_UC\_WRITE\_IS\_ATOMIC}$ + & NoC + & Indicates whether the atomic operation was successfully processed (atomically).\\ + \midrule + $\mathsf{MEM\_RESP\_UC\_WRITE\_ERROR}$ + & NoC + & Indicates whether there was an error condition while processing the request.\\ + \midrule + $\mathsf{MEM\_RESP\_UC\_WRITE\_ID}$ + & NoC + & The identification tag for the request. + It corresponds to the ID transferred with the request.\\ +\end{tabular}} +\end{table} + +\section{Interfaces' requirements} + +This section describes the basic protocol transaction requirements for the different interfaces in the \ac{HPDcache}. + +\subsection{Valid/ready handshake process}% +\seclabel{if_valid_ready} + +All interfaces in the \ac{HPDcache} use a \textbf{VALID}/\textbf{READY} handshake process to transfer a payload between a source and a destination. +The payload contains the address, data and control information. + +As a reminder, the interfaces in the \ac{HPDcache} are the following: +\begin{itemize} + \item Requesters' request interface (\tabref{if_req}); + \item Requesters' response interface (\tabref{if_rsp}); + \item Memory miss read request interface (\tabref{if_mem_miss_req}); + \item Memory miss read response interface (\tabref{if_mem_miss_rsp}); + \item Memory write-buffer write request interface (\tabref{if_mem_wbuf_req}); + \item Memory write-buffer write data request interface (\tabref{if_mem_wbuf_data}); + \item Memory write-buffer write response interface (\tabref{if_mem_wbuf_rsp}); + \item Memory uncached read request interface (\tabref{if_mem_uc_read}); + \item Memory uncached read response interface (\tabref{if_mem_uc_read_rsp}); + \item Memory uncached write request interface (\tabref{if_mem_uc_write}); + \item Memory uncached write data request interface (\tabref{if_mem_uc_write_data}); + \item Memory uncached write response interface (\tabref{if_mem_uc_write_rsp}); +\end{itemize} + +The source sets to 1 the \textbf{VALID} signal to indicate when the payload is available. +The destination sets to 1 the \textbf{READY} signal to indicate that it can accept that payload. +Transfer occurs only when both the \textbf{VALID} and \textbf{READY} signals are set to 1 on the next rising edge of the clock. + +A source is not permitted to wait until \textbf{READY} is set to 1 before setting \textbf{VALID} to 1. + +A destination may or not wait for \textbf{VALID} to set the \textbf{READY} to 1 (\figref{valid_ready_scenarios} (a) \& (c)). In other words, a destination may set \textbf{READY} to 1 before an actual transfer is available (\figref{valid_ready_scenarios} (a)). + +When \textbf{VALID} is set to 1, the source must keep it that way until the handshake occurs. +This is, at the next rising edge when both \textbf{VALID} and \textbf{READY} (from the destination) are set to 1. In other words, a source cannot retire a pending \textbf{VALID} transfer (\figref{valid_ready_scenarios} (b)). + +After an effective transfer (\textbf{VALID} and \textbf{READY} set to 1), the source may keep \textbf{VALID} set to 1 in the next cycle to signal a new transfer (with a new payload). +In the same manner, the destination may keep \textbf{READY} set to 1 if it can accept a new transfer. +This allows back-to-back transfers, with no idle cycles, between a source and a destination (\figref{valid_ready_scenarios} (d)). + +All interfaces are synchronous to the rising edge of the same global clock (\tabref{if_global_signals}). + +\begin{figure}[htbp] + {\centering + \begin{tabular}{cc} + \includegraphics[height=10em]{wave_ready_before_valid.pdf} + & \includegraphics[height=10em]{wave_valid_before_ready.pdf} \\ + \textbf{(a)} + & \textbf{(b)} \\ + \includegraphics[height=10em]{wave_ready_when_valid.pdf} + & \includegraphics[height=10em]{wave_back_to_back.pdf} \\ + \textbf{(c)} + & \textbf{(d)} \\ + \end{tabular} + \caption{\figlabel{valid_ready_scenarios}VALID/READY scenarios}} +\end{figure} + +\paragraph{Requesters' reponse interface}\mbox{} + +In the case of the requesters' response interfaces, there is a particularity. +For these interfaces, it is assumed that the \textbf{READY} signal is always set to 1. +That is why the \textbf{READY} signal is not actually implemented on those interfaces. +In other words, the requester must unconditionally accept the response, when it arrives. + + +\subsection{Address, data and byte enable alignment} +\seclabel{if_addr_data_alignment} + +\paragraph{Address alignment}\mbox{} + +In all request interfaces (Requesters' request interface, Memory miss read request interface, Memory write-buffer write request interface, Memory uncached read request interface, Memory uncached write request interface), the address transfered (\textbf{ADDR}) shall be byte-aligned to the value of the corresponding \textbf{SIZE} signal in that interface. + +Some examples are illustrated in \figref{req_addr_alignment}. +In the first case, the \textbf{SIZE} value is 2 (which corresponds to $2^2=4$ bytes). +Thus, the address must be a multiple of 4; +In the second case, \textbf{SIZE} value is 3. +Thus, the address must be a multiple of 8. +Finally, in the third case, \textbf{SIZE} value is 0. +Thus, there is no constraint on the address alignment. + +\paragraph{Data alignment}\mbox{} + +The data must be naturally aligned to the address (\textbf{ADDR}) and the maximum valid bytes of the transfer must be equal to $\mathsf{2^\mathbf{SIZE}}$. +This means that the first valid byte in the \textbf{DATA} signal must be at the indicated offset of the address. +Here, the offset corresponds to the least significant bits of the address, that allow to indicate a byte within the \textbf{DATA} word. +For example, if the \textbf{DATA} signal is 128 bits wide (16 bytes), then the offset corresponds to the first 4 bits of the \textbf{ADDR} signal. + +Some examples are illustrated in \figref{req_addr_alignment}. +As illustrated, within the data word, only bytes in the range from the indicated offset in the address, to that offset plus $\mathsf{2^\mathbf{SIZE}}$ can contain valid data. +Other bytes must be ignored by the destination. + +Additionally, within the range described above, the \textbf{BE} signal indicates which bytes within that range are actually valid. +Bytes in the \textbf{WDATA} signal where the \textbf{BE} signals are set to 0, must be ignored by the destination. + +\paragraph{Byte Enable (BE) alignment}\mbox{} + +The \textbf{BE} signal must be naturally aligned to the address (\textbf{ADDR}) and the number of bits set in this signal must be less or equal to $\mathsf{2^\mathbf{SIZE}}$. +This means that the first valid bit in the \textbf{BE} signal must be at the indicated offset of the address. +The offset is the same as the one explained above in the "Data alignment" paragraph. + +Some examples are illustrated in \figref{req_addr_alignment}. +As illustrated, within the \textbf{BE} word, only bits in the range from the indicated offset in the address, to that offset plus $\mathsf{2^\mathbf{SIZE}}$ can be set. +Other bits outside that range must be set to 0. + +\begin{figure}[tbp] + \centering + \includegraphics[width=\textwidth]{hpdcache_request_address_data_alignment.pdf} + \caption{\figlabel{req_addr_alignment}Address, Data and Byte Enable Alignment in Requests} +\end{figure} + + +\clearpage +\section{Requesters interface attributes}% +\seclabel{if_req_attr} + +\subsection{Type of operation}% +\seclabel{if_req_op} + +A requester indicates the required operation on the 4-bit, $\mathsf{HPDCACHE\_REQ\_OP}$ signal. +The supported operation are detailed in \tabref{dcache_req_op}. + +\begin{table}[h!] +\begin{center} +\caption{Request operation types}% +\tablabel{dcache_req_op} +{\footnotesize +\begin{tabular}{lll} + \toprule + \textbf{Mnemonic} + & \textbf{Encoding} + & \textbf{Type} \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_LOAD}$ + & 0b0000 + & Read operation \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_STORE}$ + & 0b0001 + & Write operation \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_AMO\_LR}$ + & 0b0100 + & Atomic Load-reserved operation \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_AMO\_SC}$ + & 0b0101 + & Atomic Store-conditional operation \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_AMO\_SWAP}$ + & 0b0110 + & Atomic SWAP operation \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_AMO\_ADD}$ + & 0b0111 + & Atomic integer ADD operation \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_AMO\_AND}$ + & 0b1000 + & Atomic bitwise AND operation \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_AMO\_OR}$ + & 0b1001 + & Atomic bitwise OR operation \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_AMO\_XOR}$ + & 0b1010 + & Atomic bitwise XOR operation \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_AMO\_MAX}$ + & 0b1011 + & Atomic integer signed MAX operation \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_AMO\_MAXU}$ + & 0b1100 + & Atomic integer unsigned MAX operation \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_AMO\_MIN}$ + & 0b1101 + & Atomic integer signed MIN operation \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_AMO\_MINU}$ + & 0b1110 + & Atomic integer unsigned MIN operation \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_CMO}$ + & 0b1111 + & \acf*{CMO} \\ +\end{tabular}} +\end{center} +\end{table} + +Load and store operations are normal read and write operations from/to the specified address. + +Atomic operations are the ones specified in the Atomic (A) extension of the \citetitle{RISCV_spec}\cite{RISCV_spec}. +More details on how this cache implements them are found in \charef{amo}. + +\acfp{CMO} are explained in \charef{cmo} + + +\subsection{Source identifier} +\seclabel{if_req_sid} + +Each request identifies its source through the $\mathsf{HPDCACHE\_REQ\_SID}$ signal. +The $\mathsf{HPDCACHE\_REQ\_SID}$ signal shall be decoded when the $\mathsf{HPDCACHE\_REQ\_VALID}$ signal is set to 1. + +The width of this signal is $\mathsf{CONF\_HPDCACHE\_REQ\_SRC\_ID\_WIDTH}$ (\tabref{dcache_parameters}) bits. + +The \ac{HPDcache} reflects the value of the \textbf{SID} of the request into the corresponding \textbf{SID} of the response. + +Each port must have an unique ID that corresponds to its number. +Each port is numbered from $\mathsf{0}$ to $\mathsf{N-1}$. +Port number $\mathsf{N}$ is dedicated to the hardware memory prefetcher. +This number shall be constant for a given port (requester). + +The \ac{HPDcache} uses this information to route responses to the correct requester. + + +\subsection{Transaction identifier} +\seclabel{if_req_tid} + +Each request identifies transactions through the $\mathsf{HPDCACHE\_REQ\_TID}$ signal. +The $\mathsf{HPDCACHE\_REQ\_TID}$ signal shall be decoded when the $\mathsf{HPDCACHE\_REQ\_VALID}$ signal is set to 1. + +The width of this signal is $\mathsf{CONF\_HPDCACHE\_REQ\_TRANS\_ID\_WIDTH}$ bits (\tabref{dcache_parameters}). + +This signal can contain any value from $\mathsf{0}$ to $\mathsf{2^{CONF\_HPDCACHE\_REQ\_TRANS\_ID\_WIDTH} - 1}$. + +The \ac{HPDcache} forwards the value of the \textbf{TID} of the request into the \textbf{TID} of the corresponding response. + +A requester can issue multiple transactions without waiting for earlier transactions to complete. +Because the \ac{HPDcache} can respond to these transactions in a different order than that of requests, the requester can use the \textbf{TID} to match the responses with respect to requests. + +The ID of transactions is not necessarily unique. +A requester may reuse a given transaction ID for different transactions. +That is, even when some of these transactions are not yet completed. +In this case, when the requester starts multiple transactions with the same \textbf{TID}, the requester cannot match responses and requests. +As explained above, this is because the cache can respond out-of-order with respect to requests. + + +\subsection{Cacheability} +\seclabel{if_req_uncacheable} + +This cache considers that the memory space is segmented. +A segment corresponds to an address range: a base address and an end address. +Some segments are cacheable and others not. +The \ac{HPDcache} needs to know which segments are cacheable to determine if for a given read request, it needs to replicate read data into the cache. + +The request interface implements an uncacheable bit ($\mathsf{HPDCACHE\_REQ\_UNCACHEABLE}$). +When this bit is set, the access is considered uncacheable. +The $\mathsf{HPDCACHE\_REQ\_UNCACHEABLE}$ signal shall be decoded when the $\mathsf{HPDCACHE\_REQ\_VALID}$ signal is set to 1. + +\begin{tcolorbox}[colback=red!10!white, + colframe=white!10!red, + title=\textbf{Important}, + center, valign=top, halign=left, + center title, + width=.950\linewidth] +For a given address, the uncacheable attribute must be consistent between accesses. +The granularity is the cacheline. +In the event that the same address is accessed with different values in the uncacheable attribute, the behavior of the cache for that address is unpredictable. +\end{tcolorbox} + + +\subsection{Need response}% +\seclabel{if_req_need_rsp} + +For any given request, a requester can set to 0 the bit $\mathsf{HPDCACHE\_REQ\_NEED\_RSP}$ to indicate that it does not wish a response for that request. +The $\mathsf{HPDCACHE\_REQ\_NEED\_RSP}$ signal shall be decoded when the $\mathsf{HPDCACHE\_REQ\_VALID}$ signal is set to 1. + +When $\mathsf{HPDCACHE\_REQ\_NEED\_RSP}$ is set to 0, the \ac{HPDcache} processes the request but it does not send an acknowledge to the corresponding requester when the transaction is completed. + + +\subsection{Error response}% +\seclabel{if_rsp_error} + +The response interface contains a single-bit $\mathsf{HPDCACHE\_RSP\_ERROR}$ signal. +This signal is set to 1 by the \ac{HPDcache} when some error condition occurred during the processing of the corresponding request. +The $\mathsf{HPDCACHE\_RSP\_ERROR}$ signal shall be decoded when the $\mathsf{HPDCACHE\_RSP\_VALID}$ signal is set to 1. + +When the $\mathsf{HPDCACHE\_RSP\_ERROR}$ signal is set to 1 in the response, the effect of the corresponding request is undetermined. +In the case of \textbf{LOAD} or \textbf{AMOs} operations (see \secref{if_req_op}), the \textbf{RDATA} signal in the response does not contain any valid data. + + +\clearpage +\section{Memory interface attributes} + +\subsection{Type of operation}% +\seclabel{if_mem_req_op} + +\begin{table}[h!] +\begin{center} +\caption{Memory request operation types}% +\tablabel{dcache_mem_req_op} +{\footnotesize +\begin{tabular}{lll} + \toprule + \textbf{Mnemonic} + & \textbf{Encoding} + & \textbf{Type} \\ + \midrule + $\mathsf{HPDCACHE\_MEM\_LOAD}$ + & 0b00 + & Read operation \\ + \midrule + $\mathsf{HPDCACHE\_MEM\_STORE}$ + & 0b01 + & Write operation \\ + \midrule + $\mathsf{HPDCACHE\_MEM\_ATOMIC}$ + & 0b10 + & Atomic operation \\ +\end{tabular}} +\end{center} +\end{table} + +Load and store operations are normal read and write operations from/to the specified address. + +In case of an atomic operation request ($\mathsf{HPDCACHE\_MEM\_ATOMIC}$), the specific operation is specified in the $\mathsf{MEM\_REQ\_ATOMIC}$ signal. + + +\subsubsection{Atomic operations on the memory interface} + +The supported atomic operations are listed in \tabref{dcache_mem_req_atop}. +These are transmitted in the $\mathsf{MEM\_REQ\_ATOMIC}$ signal. +Note that these operations are compatible with those in AXI. + +\begin{table}[h!] +\begin{center} +\caption{Memory request atomic operation types}% +\tablabel{dcache_mem_req_atop} +{\footnotesize +\begin{tabular}{lll} + \toprule + \textbf{Mnemonic} + & \textbf{Encoding} + & \textbf{Type} \\ + \midrule + $\mathsf{HPDCACHE\_MEM\_ATOMIC\_ADD}$ + & 0b0000 + & Atomic fetch-and-add operation \\ + \midrule + $\mathsf{HPDCACHE\_MEM\_ATOMIC\_CLR}$ + & 0b0001 + & Atomic fetch-and-clear operation \\ + \midrule + $\mathsf{HPDCACHE\_MEM\_ATOMIC\_SET}$ + & 0b0010 + & Atomic fetch-and-set operation \\ + \midrule + $\mathsf{HPDCACHE\_MEM\_ATOMIC\_EOR}$ + & 0b0011 + & Atomic fetch-and-exclusive-or operation \\ + \midrule + $\mathsf{HPDCACHE\_MEM\_ATOMIC\_SMAX}$ + & 0b0100 + & Atomic fetch-and-maximum (signed) operation \\ + \midrule + $\mathsf{HPDCACHE\_MEM\_ATOMIC\_SMIN}$ + & 0b0101 + & Atomic fetch-and-minimum (signed) operation \\ + \midrule + $\mathsf{HPDCACHE\_MEM\_ATOMIC\_UMAX}$ + & 0b0110 + & Atomic fetch-and-maximum (unsigned) operation \\ + \midrule + $\mathsf{HPDCACHE\_MEM\_ATOMIC\_UMIN}$ + & 0b0111 + & Atomic fetch-and-minimum (unsigned) operation \\ + \midrule + $\mathsf{HPDCACHE\_MEM\_ATOMIC\_SWAP}$ + & 0b1000 + & Atomic swap operation \\ + \midrule + $\mathsf{HPDCACHE\_MEM\_ATOMIC\_LDEX}$ + & 0b1100 + & Load-exclusive operation \\ + \midrule + $\mathsf{HPDCACHE\_MEM\_ATOMIC\_STEX}$ + & 0b1101 + & Store-exclusive operation \\ +\end{tabular}} +\end{center} +\end{table} + +\subsubsection{Operations used per interface} + +As a reminder, the \ac{HPDcache} implements multiple (four) request interfaces to the memory: + +\begin{itemize} + \item Memory miss read request interface (\tabref{if_mem_miss_req}); + \item Memory write-buffer (wbuf) write request interface (\tabref{if_mem_wbuf_req}); + \item Memory uncached read request interface (\tabref{if_mem_uc_read}); + \item Memory uncached write request interface (\tabref{if_mem_uc_write}); +\end{itemize} + +\Tabref{dcache_mem_req_supported_op} indicates the type of operations that each of these four request interfaces can issue. + +\begin{table}[!htbp] +\begin{center} +\caption{Supported operation types by request interfaces to the memory}% +\tablabel{dcache_mem_req_supported_op} +{\footnotesize +\begin{tabular}{ll} + \toprule + \textbf{Type} + & \textbf{Interfaces}\\ + \midrule + $\mathsf{MEM\_REQ\_LOAD}$ + & + \begin{tabular}{l} + - Memory miss read request;\\ + - Memory uncached read request. + \end{tabular} + \\ + \midrule + $\mathsf{MEM\_REQ\_STORE}$ + & + \begin{tabular}{l} + - Memory write-buffer write request;\\ + - Memory uncached write request. + \end{tabular}\\ + \midrule + $\mathsf{MEM\_REQ\_ATOMIC}$ + & + \begin{tabular}{l} + - Memory uncached write request. + \end{tabular}\\ +\end{tabular}} +\end{center} +\end{table} + + +\subsubsection{Responses for read-modify-write atomic operations on the memory interface} + +\begin{minipage}{\textwidth} +The requests listed below behave as a read-modify-write operations: +{\small\begin{itemize}[itemsep=0pt] + \item $\mathsf{HPDCACHE\_MEM\_ATOMIC\_ADD}$ + \item $\mathsf{HPDCACHE\_MEM\_ATOMIC\_CLR}$ + \item $\mathsf{HPDCACHE\_MEM\_ATOMIC\_SET}$ + \item $\mathsf{HPDCACHE\_MEM\_ATOMIC\_EOR}$ + \item $\mathsf{HPDCACHE\_MEM\_ATOMIC\_SMAX}$ + \item $\mathsf{HPDCACHE\_MEM\_ATOMIC\_SMIN}$ + \item $\mathsf{HPDCACHE\_MEM\_ATOMIC\_UMAX}$ + \item $\mathsf{HPDCACHE\_MEM\_ATOMIC\_UMIN}$ + \item $\mathsf{HPDCACHE\_MEM\_ATOMIC\_SWAP}$ +\end{itemize}} +\end{minipage} + +These requests are forwarded to the memory through the uncached write request interface (\tabref{if_mem_uc_write}). +A particularity of these requests is that they generate two responses from the memory: +\begin{itemize} + \item Old data value from memory is returned through the memory uncached read response interface (\tabref{if_mem_uc_read_rsp}). + \item Write acknowledgement is returned through the memory uncached write response interface (\tabref{if_mem_uc_write_rsp}). +\end{itemize} + +Both responses may arrive in any given order to the initiating \ac{HPDcache}. + +Regarding errors, if any response has its ERROR signal set to 1 ($\mathsf{MEM\_RESP\_UC\_*\_ERROR}$), the \ac{HPDcache} considers that the operation was not completed. +It waits for both responses and it forwards an error response ($\mathsf{HPDCACHE\_RSP\_ERROR}$ is set to 1) to the corresponding requester on the \ac{HPDcache} requesters' side. + + +\subsubsection{Responses for exclusive load and store operations on the memory interface} + +Exclusive load and store operations are issued as normal load and store operations on the memory uncached read request interface (\tabref{if_mem_uc_read}) and memory uncached write request interface (\tabref{if_mem_uc_write}), respectively. + +Specific operation types are however used on these exclusive requests: +$\mathsf{HPDCACHE\_MEM\_ATOMIC\_LDEX}$ for loads; and $\mathsf{HPDCACHE\_MEM\_ATOMIC\_STEX}$ for stores. + +These requests behave similarly to normal load and store to the memory but provide some additional properties described in \charef{amo}. + +In the case of the $\mathsf{HPDCACHE\_MEM\_ATOMIC\_STEX}$ request, the write acknowledgement contains an additional information in the $\mathsf{MEM\_RESP\_UC\_WRITE\_IS\_ATOMIC}$. +If this signal is set to 1, the exclusive store was "atomic", hence the data was actually written in memory, +Otherwise, if this signal is set to 0, the exclusive store was "non-atomic". +Hence the write operation was aborted. + +The \ac{HPDcache} uses exclusive stores in case of \ac{SC} operations from requesters. +Depending on the $\mathsf{MEM\_RESP\_UC\_WRITE\_IS\_ATOMIC}$ value, the \ac{HPDcache} responds to the requester according to the rules explained in \secref{sc_failure_code}. +A "non-atomic" response is considered a "SC failure", and a "atomic" response is considered a "SC success". + +\clearpage +\chapter{Architecture} +\minitoc +\newpage + +\Figref{dcache_subsystem} depicts a global view of the \ac{HPDcache}. +On the upper part of the cache there is the interface from/to requesters. +On the bottom part there is the interface from/to the memory. + +\begin{figure}[htbp] + \includegraphics[width=\textwidth]{hpdcache_core.pdf} + \caption{\figlabel{dcache_subsystem}HPDcache core} +\end{figure} + +\section{Cache Controller} + +The cache controller is responsible for decoding and issuing the requests to the appropriate handler. +The cache controller implements a 3-stage pipeline. +This pipeline is capable of accepting one request per cycle. +However, there are some scenarios where the pipeline, may either stall, or put a request on hold in a side buffer called \acf{RTAB}. + +The first stage (stage 0) of the pipeline arbitrates between requests from the miss handler (refill), \ac{RTAB}, and requesters; the second stage (stage 1) responds to loads (in case of hit) and to stores; the third stage (stage 2) is only used by loads in case of miss. +In this last stage, the cache allocates a new entry in the \ac{MSHR}. + +A request on stage 0 can either be consumed on that cycle (forwarded to the stage 1 or to the \ac{RTAB}), or wait, when the pipeline is stalled. +A request on stage 1 or stage 2 always advances. +In stage 1 the request is either acknowledged (load hit or write acknowledgement), forwarded to stage 2 (load miss), or put into the \ac{RTAB}. + +\paragraph{Pipeline stalls in stage 0}\mbox{} + +Stalls in stage 0 are necessary in some specific scenarios, that are listed below. +When there is a stall in stage 0, a new request from a requester cannot be accepted, this is, the corresponding $\mathsf{READY}$ signal is kept low (see \secref{if_valid_ready}). +Requests in the other stages (1 and 2) are processed normally (even in case of a stall in stage 0). + +\begin{description} +\item[Event 1:] The \ac{RTAB} is full; +\item[Event 2:] A \ac{CMO} invalidation or fence operation is being processed by the corresponding handler; +\item[Event 3:] An uncacheable or atomic operation is being processed by the corresponding handler; +\item[Event 4:] There is a load miss in stage 1; +\item[Event 5:] There is a store in stage 1 and the request in stage 0 is a load (structural hazard on access to the internal cache data memory); +\end{description} + +The number of clock cycles of the stall in stage 0 depends on the type of event: +\begin{itemize} +\item \textbf{Events~4~\&~5}: the number of clock cycles is always one. +\item \textbf{Events~1,~2~\&~3}: the number of clock cycles is variable: + \begin{itemize} + \item \textbf{Event~1}: it depends on when an entry of the \ac{RTAB} is freed. + \item \textbf{Events~2~\&~3}: it depends on the latency of the corresponding operation. + \end{itemize} +\end{itemize} + + +\subsection{On-Hold Requests} +\seclabel{onhold} + +In some scenarios, a request that has been accepted in the pipeline can be later put on-hold by the cache controller. +When a request is put on-hold, it is re-executed when all the blocking conditions have been removed. +The blocking conditions putting a request on-hold are the following: + +\begin{description} + \item[Case 1:] \textbf{Cacheable LOAD or PREFETCH, and there is a hit on a pending miss (hit on the \acs{MSHR})} + +When there is a read miss on an address (cacheline granurality) for which there is a pending read miss, then the more recent one needs to wait for the previous one to be served. +This allows the latest one to read the data from the cache after the refill operation completes. + + \item[Case 2:] \textbf{Cacheable LOAD or PREFETCH, there is a miss on the cache, and there is a hit (cacheline granularity) on an opened, closed or sent entry of the \ac{WBUF}} + +When there is a read miss on an address, the cache controller needs to read from the memory the missing cacheline. +As the \ac{NoC} implements different physical channels for read and write requests, there is a race condition between the read miss and a pending write operation. +If the read miss arrives first to the memory, it would read the old data (which violates data consistency rules~\secref{rtab_mcr}). +This blocking condition causes that the LOAD or PREFETCH will have a delay penalty of up to two transaction delays: one for the write to complete, then one for the read. + + \item[Case 3:] \textbf{Cacheable STORE, there is a miss on the cache, and there is a hit on a pending miss (hit on the \acs{MSHR})} + +When writing, as the \ac{NoC} implements different physical channels for read and write requests, there is a race condition between the STORE and the pending read miss. +If the STORE arrives first to the memory, the earlier read miss would read the new data (which violates data consistency rules in~\secref{rtab_mcr}). + + \item[Case 4:] \textbf{Cacheable STORE, and there is a hit on a closed entry of the \ac{WBUF}, or the \ac{WBUF} is full} + +Writes on the same address need to be sent in order (to respect data consistency rules). +When there is a closed entry in the \ac{WBUF}, this means that it is waiting to be sent to the memory. +While it is not sent, the cache cannot open a new entry in the \ac{WBUF} for the same address, because they may be sent in an arbitrary order. + + \item[Case 5:] \textbf{Cacheable LOAD/PREFETCH/STORE, and there is a hit on an entry of the \ac{RTAB}} + +Accesses to the same address (in cacheline granularity) MUST be processed in order (to respect data consistency rules). +In case of a hit with a valid entry in the \ac{RTAB}, the new request is written into the corresponding list of the \ac{RTAB}. + + \item[Case 6:] \textbf{Cacheable LOAD or PREFETCH, there is a miss on the cache, and the \acs{MSHR} has no available slots} + +When there is a read miss on an address, the cache controller needs to allocate a new entry in the \acs{MSHR}. +The \acs{MSHR} is a set-associative memory. +If there is no available WAY to store the new read miss request, then this request needs to wait for an entry in the \ac{MSHR} with the corresponding SET to be freed. +This is when a refill operation is completed for a cacheline with the same \acs{MSHR} SET index. + + \item[Case 7:] \textbf{Cacheable LOAD or PREFETCH, there is a miss on the cache, and the miss handler FSM cannot send the read miss request} + +When there is a read miss on an address, the cache controller needs to read from memory the missing cacheline. +The read miss request is sent by the miss handler FSM, but if there is congestion in the \ac{NoC}, this read request cannot be issued. +To avoid blocking the pipeline and creating a deadlock, the request is put on-hold. + +\end{description} + +All these conditions, except for case 5, are checked on the second stage (stage 1) of the pipeline. +Case 5 is checked in the first stage (stage 0) of the pipeline. +If one of the conditions is met, the request is put into the \ac{RTAB}. +It is kept on-hold until its blocking condition is solved. +At that moment, the request can be replayed from the \ac{RTAB} on the pipeline from stage 0. + +The \ac{RTAB} can store multiple requests (on-hold requests). +The idea is to improve the throughput of the cache by reducing the number of cases where there is a head of line blocking at the client interface. + +When a request cannot be processed right away, because it depends on the completion of a previous one, the request is stored in the replay table. +This allows new requests to arrive to the data cache and to be potentially executed (in an out-of-order fashion). +To prevent a deadlock, if the \ac{RTAB} is full, the \ac{HPDcache} does not accept new requests. + +The ready requests in the \ac{RTAB} have higher priority than new requests. +These requests are executed as soon as possible, that is, when their dependencies are resolved. + +To execute a request from the \ac{RTAB}, the cache controller complies to the rules defined in~\secref{rtab_mcr}. + +\subsection{\acfp*{MCR}}% +\seclabel{rtab_mcr} + +When multiple requests are put on-hold in the \ac{RTAB}, the cache controller may issue them (once they are ready) in a different order than the order in which they arrived (program order). +However, the cache controller needs to respect certain rules, here called \aclp{MCR}, to allow the requesters to have a predictable behavior. + +The set of rules followed by the cache controller are those defined by the \ac{RVWMO} memory consistency model~\cite{RISCV_spec}. +A brief statement summarizing these rules is the following: \textbf{if one memory access (read or write), A, precedes another memory access (read or write), B, and they access overlapping addresses, then they MUST be executed in program order (A then B)}. +It can be deduced from this statement, that non-overlapping accesses can be executed in any order. + +Of course, the cache controller also needs to respect the progress axiom: \textbf{"no memory operation may be preceded by an infinite number of memory operations"}. +That is, all memory operations need to be processed at some point in time, thus cannot wait indefinitely. + + +\section{Miss Handler} + +This block is in charge of handling read miss requests to the memory. +It has three parts: +\begin{enumerate} +\item The first part is in charge of forwarding read miss requests to the memory; +\item The second part is in charge of tracking the status of in-flight read misses; +\item The third part is in charge of writing into the cache the response data from the memory, and update the cache directory accordingly. +\end{enumerate} + +\Todo{Add FIFO buffer for storing miss requests to the MISS HANDLER. +This allows to reduce the number of requests put on-hold when the read request \ac{NoC} is congested.} + + +\subsection{Multiple-entry \acf*{MSHR}} +\seclabel{mshr} + +The second part (tracking) of the miss handler contains an essential component of the \ac{HPDcache}: the set-associative multi-entry \ac{MSHR}. +Each entry of this component contains the status for each in-flight read miss request to the memory. +Therefore, the number of entries in the \ac{MSHR} defines the maximum number of in-flight read miss requests. + +The number of entries in the \ac{MSHR} depends on two configuration values: $\mathsf{CONF\_HPDCACHE\_MSHR\_WAYS}$ and $\mathsf{CONF\_HPDCACHE\_MSHR\_SETS}$. +The number of entries is computed as: +\begin{equation*} +\mathsf{HPDCACHE\_MSHR\_SETS~\times~CONF\_HPDCACHE\_MSHR\_WAYS} +\end{equation*} + +As for any set-associative array: + +\begin{tabular}{p{\linewidth}} + \toprule + When $\mathsf{CONF\_HPDCACHE\_MSHR\_SETS = 1}~\text{and}~\mathsf{CONF\_HPDCACHE\_MSHR\_WAYS > 1}$\\ + $\rightarrow$~The \ac{MSHR} behaves as a fully-associative access array.\\ + \midrule + When $\mathsf{CONF\_HPDCACHE\_MSHR\_SETS > 1}~\text{and}~\mathsf{CONF\_HPDCACHE\_MSHR\_WAYS = 1}$\\ + $\rightarrow$~The \ac{MSHR} behaves as a direct access array.\\ + \midrule + When $\mathsf{CONF\_HPDCACHE\_MSHR\_SETS > 1}~\text{and}~\mathsf{CONF\_HPDCACHE\_MSHR\_WAYS > 1}$\\ + $\rightarrow$~The \ac{MSHR} behaves as a set-associative access array\\ +\end{tabular} + +A high number of entries in the \ac{MSHR} allows to overlap multiple accesses to the memory, and hides its latency. +Of course, the more entries there are, the more area the \ac{MSHR} consumes. +Therefore, the system architect must choose \ac{MSHR} parameters depending on a combination of memory latency, memory throughput, required area and performance, and the capability of requesters to issue multiple read transactions. + +\begin{tcolorbox}[colback=red!10!white, + colframe=white!10!red, + title=\textbf{Important}, + center, valign=top, halign=left, + center title, + width=.950\linewidth] +Regarding the last condition, regardless whether the requesters can issue multiple read requests, the hardware memory prefetcher exploits having multiple in-flight read miss requests. +\end{tcolorbox} + +An entry in the \ac{MSHR} contains the following information: + +\begin{center} +\begin{tabular}{lccccc} +\toprule% +{\bf Bits} &% +T &% +R &% +S &% +W &% +1 \\ +\midrule +{\bf Description} &% +MSHR Tag &% +Request ID &% +Source ID &% +Word Index &% +Need Response\\ +\end{tabular} + +\begin{tabular}{ll} +\toprule% +{\bf Field} &% +{\bf Width} \\ +\midrule% +MSHR tag (T) &% +$\mathsf{T = HPDCACHE\_NLINE\_WIDTH - log_2(CONF\_HPDCACHE\_MSHR\_SETS)}$ \\ +\midrule% +Request ID (R) &% +$\mathsf{R = CONF\_HPDCACHE\_REQ\_TRANS\_ID\_WIDTH}$ \\ +\midrule% +Source ID (S) &% +$\mathsf{S = CONF\_HPDCACHE\_REQ\_SRC\_ID\_WIDTH}$ \\ +\midrule% +Word Index (W) &% +$\mathsf{W = log_2(CONF\_HPDCACHE\_CL\_WORDS})$ \\ +\end{tabular} +\end{center} + +\subsubsection{\acs*{MSHR} implementation} +\seclabel{mshr_implementation} + +In order to limit the area cost of the \ac{MSHR}, it can be implemented using SRAM macros. +The depth of the macros is $\mathsf{CONF\_HPDCACHE\_MSHR\_SETS\_PER\_RAM}$. +Multiple ways, for the same set, can be put side-by-side in the same SRAM word ($\mathsf{CONF\_HPDCACHE\_MSHR\_WAYS\_PER\_RAM\_WORD}$), therefore the width is a multiple of $\mathsf{HPDCACHE\_MSHR\_ENTRY = T + R + S + W + 1}$ bits. +The total number of SRAM macros is: + +\begin{equation*} +\begin{split} +\mathsf{(CONF\_HPDCACHE\_MSHR\_WAYS/CONF\_HPDCACHE\_MSHR\_WAYS\_PER\_RAM\_WORD)\times} \\ +\mathsf{\lceil{}CONF\_HPDCACHE\_MSHR\_SETS/CONF\_HPDCACHE\_MSHR\_SETS\_PER\_RAM{}\rceil} +\end{split} +\end{equation*} + +SRAM macros shall be selected depending on the required number of entries, and the target technology node. Additional information about \ac{MSHR} SRAM macros can be found in \apxref{ram_macros}. +When the number of entries is low (e.g. sets times ways are less than 16), it is generally better to implement the \ac{MSHR} using flip-flops. + +This makes \ac{MSHR} fully-associative and thus removes associativity conflicts. + + +\section{Uncacheable Handler} + +This block is responsible for handling uncacheable (see \secref{if_req_uncacheable}) load and store requests, as well as atomic requests (regardless of whether they are cacheable or not). +For more information about atomic requests see \charef{amo}. + +All requests handled by this block produce a request to the memory. +This request to the memory is issued through the memory uncached interfaces. +Uncacheable read requests are forwarded to the memory through the memory read uncached interface (\tabref{if_mem_uc_read}); +and uncacheable write requests or atomic requests are forwarded through the memory write uncached interface (\tabref{if_mem_uc_write}). + + +\section{\acf{CMO} Handler} + +This block is responsible for handling \acp{CMO}. +\acp{CMO} are special requests from requesters that address the cache itself, and not the memory nor a peripheral. +These operations allow to either invalidate designated cachelines in the cache, or produce explicit memory read and write fences. + +The complete list of supported \acp{CMO} is detailed in \charef{cmo}. + +\section{Cache Directory and Data} + +\subsection{RAM Organization} +\seclabel{dir_data_ram_implementation} + +The \ac{HPDcache} cache uses RAM macros for the directory and data parts of the cache. +These RAM macros are synchronous, read/write, single-port RAMs. +Additional information about RAM macros in the cache can be found in \apxref{ram_macros}. + +The organization of the RAMs, for the directory and the data, targets the following: +\begin{enumerate}[itemsep=1em] +\item {\bf High memory bandwidth to/from the requesters} + +To improve performance, the organization allows to read one data word (1, 2, 4, 8, 16 or 32 bytes) per cycle, with a latency of one cycle. + +\item {\bf Low energy-consumption} + +To limit the energy-consumption, the RAMs are organized in a way that the cache enables only a limited number of RAM macros. +This number depends on the number of requested bytes, and it also depends on the target technology. +Depending on the target technology, the RAM macros have different trade-offs between width, depth and timing (performance). + +\item {\bf Small RAM footprint} + +To limit the footprint of RAMs, the selected organization implements a small number of RAMs macros. +The macros are selected in a way that they are as deep and as wide as possible. +The selected ratios (depth x width) depend on the target technology as explained above. + +\end{enumerate} + +\subsection{Example cache data/directory RAM organization} + +\Figref{dcache_ram_organization} illustrates an example organization of the RAMs. +The illustrated organization allows to implement 32 KB of data cache (128 sets, 4 ways, and 64 bytes lines). +This example organization has a refilling latency of two cycles because the cache needs to write two different entries on a given memory cut. + +\begin{figure}[htbp] + \includegraphics[width=\textwidth]{hpdcache_data_ram_organization.pdf} + \caption{\figlabel{dcache_ram_organization}Data Cache Micro-Architecture} +\end{figure} + +The example RAM organization in \figref{dcache_ram_organization} allows to access from 1 to 32 bytes of a given cacheline per cycle. + +The energy consumption is dependent on the length of the access. Accesses from 1 to 8 bytes need to read two memory cuts (one containing ways 0 and 1, and the other containing ways 2 and 3); accesses from 8 to 16 bytes need to read 4 memory cuts; and so on. For reading 24 to 32 bytes, the cache needs to access all the cuts at the same time (8 cuts). + +\section{\acf*{RTAB}} + +The \ac{RTAB} is implemented as an array of linked lists. +It is a fully-associative multi-entry buffer, where each valid entry, belongs to a linked list. +It is implemented in flip-flops. +The linked lists contain a list of requests that target the same cacheline. +There can be multiple linked lists, but each shall target a different cacheline. +The head of each linked list contains the oldest request while the tail contains the newest request. +The requests are processed from the head to the tail in order to respect the \acp{MCR} explained in section \secref{rtab_mcr}. + +Regarding the pop operation (extracting a ready request from the replay table), it is possible that once the request is replayed, some of the resources it needs are again busy. +Therefore, the request needs to be put on-hold again. +In this case, the request needs to keep its position as head of the linked list. +This is to preserve the program order. +For this reason, the pop operation is implemented as a two-step operation: pop then commit, or pop then rollback. +The commit operation allows to actually remove the request, while the rollback allows to undo the pop. + +An entry of the \ac{RTAB} has the following structure (LL means Linked List): + +\begin{center} +\begin{tabular}{p{.15\linewidth}p{.10\linewidth}p{.10\linewidth}p{.10\linewidth}p{.10\linewidth}p{.07\linewidth}} + \toprule% + Request \mbox{($\approx$200 bits)} & \mbox{LL tail} \mbox{(1 bit)} & \mbox{LL head} \mbox{(1 bit)} & \mbox{LL next} \mbox{(2-3 bits)} & Deps \mbox{(5 bits)} & Valid \mbox{(1 bit)} \\ + \midrule% +\end{tabular} +\end{center} + +\begin{itemize} +\item Request: contains the on-hold request from the core (data + meta-data). +\item LL tail: indicates if the entry is the tail of a linked list. +\item LL head: indicates if the entry is the head of a linked list. +\item LL next: designates the next (older) request in the linked list. +\item Deps bits: indicates the kind of dependency that keeps the request on-hold. +\item Valid: indicates if the entry contains valid information (if unset the entry is free). +\end{itemize} + +The following table briefly describes the possible dependencies between memory requests. +For each kind of dependency, there is a corresponding bit in the "deps bits" field of \ac{RTAB} entries. + +\begin{tabular}{p{.30\linewidth}p{.70\linewidth}} + \toprule% + \bf Dependency + & \bf Description \\ + \midrule% + MSHR\_hit + & There is an outstanding miss request on the target address \\ + \midrule% + MSHR\_full + & The MSHR is full \\ + \midrule% + MISS\_handler\_busy + & The MISS HANDLER is busy and cannot send a new miss request \\ + \midrule% + WBUF\_hit + & There is a match with a open, closed, or sent entry in the write buffer \\ + \midrule% + WBUF\_not\_ready + & There is a match with a closed entry in the write buffer or the write-buffer is full\\ +\end{tabular} + +\paragraph{\ac{RTAB} operations}\mbox{} + +The \ac{RTAB} implements the following operations: + +\begin{tabular}{p{.30\linewidth}p{.70\linewidth}} + \toprule + \textbf{Operation} & \textbf{Description} \\ + \midrule + $\mathsf{rtab\_alloc()}$ & Allocate a new linked list \\ + \midrule + $\mathsf{rtab\_alloc\_and\_link()}$ & Allocate a new entry and link it to an existing linked list \\ + \midrule + $\mathsf{rtab\_pop\_try()}$ & Get a ready request from one of the linked list (wihout actually removing it from the list) \\ + \midrule + $\mathsf{rtab\_pop\_commit()}$ & Actually remove a popped request from the list \\ + \midrule + $\mathsf{rtab\_pop\_rollback()}$ & Rollback a previously popped request (with a possible update of its dependencies) \\ + \midrule + $\mathsf{rtab\_find\_ready()}$ & Find a ready request among the heads of valid linked lists \\ + \midrule + $\mathsf{rtab\_find\_empty()}$ & Find an empty request \\ + \midrule + $\mathsf{rtab\_empty()}$ & Is the RTAB empty ? \\ + \midrule + $\mathsf{rtab\_full()}$ & Is the RTAB full ? \\ + \midrule + $\mathsf{update\_deps()}$ & Update the dependency bits of valid requests \\ +\end{tabular} + +\begin{lstlisting}[language=c] +int rtab_alloc(req_t r, deps_t d) +{ + int index = rtab_find_empty_entry(); + rtab[index] = { + valid : 1, + deps : d, + ll_head : 1, + ll_tail : 1, + ll_next : 0, + request : r + }; + return index; +} +\end{lstlisting} + +\begin{lstlisting} +int rtab_alloc_and_link(req_t r, int n) +{ + int index = rtab_find_empty_entry(); + + // replace the tail of the linked list + rtab[n].ll_tail = 0; + + // add the new request as the tail of the linked list + rtab[index] = { + valid : 1, + deps : 0, + ll_head : 0, + ll_tail : 1, + ll_next : n, + request : r + }; + + return index; +} +\end{lstlisting} + +\begin{lstlisting} +req_t rtab_pop_try(int &index) +{ + index = rtab_find_ready_entry(); + + // Temporarily unset the head bit. This is to prevent the + // request to be rescheduled. + rtab[index].ll_head = 0; + + return rtab[index].request; +} +\end{lstlisting} + +\begin{lstlisting} +void rtab_pop_commit(int index) +{ + // Change the head of the popped linked list + // (look for a valid entry with the next field + // pointing to the popped entry) + for (int i = 0; i < RTAB_NENTRIES; i++) { + if (rtab[i].valid && (i != index) && (rtab[i].next == index) { + rtab[i].ll_head = 1; + } + } + + rtab[index].valid = 0; +} +\end{lstlisting} + +\begin{lstlisting} +void rtab_pop_rollback(int index, bitvector deps) +{ + rtab[index].ll_head = 1; + rtab[index].deps = deps; +} +\end{lstlisting} + +\begin{lstlisting} +int rtab_find_ready_entry(int last) +{ + // choose a ready entry using a round-robin policy + int i = (last + 1) % RTAB_NENTRIES; + for (;;) { + // ready entry found + if (rtab[i].valid && rtab[i].ll_head && (rtab[i].deps == 0)) + return i; + + // there is no ready entry + if (i == last) + return -1; + + i = (i + 1) % RTAB_NENTRIES; + } +} +\end{lstlisting} + +\begin{lstlisting} +int rtab_find_empty_entry() +{ + for (int i = 0; i < RTAB_NENTRIES; i++) + if (!rtab[i].valid) + return i; + + return -1; +} +\end{lstlisting} + +\begin{lstlisting} +bool rtab_is_full() +{ + return (rtab_find_empty_entry() == -1); +} +\end{lstlisting} + +\begin{lstlisting} +int rtab_is_empty() +{ + for (int i = 0; i < RTAB_NENTRIES; i++) + if (rtab[i].valid) + return 0; + + return 1; +} +\end{lstlisting} + +\subsection{\ac{RTAB} integration in the cache} + +The data cache has a 3-stages pipeline. +The \ac{RTAB} will be used in stages 0 and 1 (st0 and st1). +The following table summarizes the actions performed on the \ac{RTAB}: + +{\footnotesize% +\begin{tabular}{% +p{.12\linewidth}p{.17\linewidth}p{.11\linewidth}p{.11\linewidth}p{.11\linewidth}p{.11\linewidth}p{.11\linewidth}p{.11\linewidth}} + \toprule% + {\bf \mbox{New Request}} + & {\bf Match @ in \ac{RTAB}} + & {\bf Match @ in \ac{MSHR}} + & {\bf Match @ in \ac{WBUF}} + & {\bf Cache Miss AND \ac{MSHR} is full} + & {\bf Cache Miss AND Miss Handler is not ready} + & {\bf \ac{WBUF} is full} \\ + \midrule% + LOAD + & $\mathsf{alloc\_and\_link}$ (st0) + & $\mathsf{alloc\_new}$ (st1) + & $\mathsf{alloc\_new}$ (st1) + & $\mathsf{alloc\_new}$ (st1) + & $\mathsf{alloc\_new}$ (st1) + & $\phi$ \\ + \midrule% + STORE + & $\mathsf{alloc\_and\_link}$ (st0) + & $\mathsf{alloc\_new}$ (st1) + & $\mathsf{alloc\_new}$ (st1) (if $\mathsf{wbuf\_entry}$ is closed) + & $\phi$ + & $\phi$ + & $\mathsf{alloc\_new}$ (st1) \\ + \midrule% +\end{tabular}} + +\subsection{Policy for taking new requests in the data cache} + +With the \ac{RTAB}, the cache has three possible sources of requests: +\begin{enumerate} +\item Requesters (new requests); +\item the \ac{RTAB} (on-hold requests); +\item the miss handler (refill requests). +\end{enumerate} + +The policy to choose the request is as follows: + +\begin{lstlisting} +rtab_req = rtab_find_ready_entry(); +if (rtab_is_full()) { + new_req = rtab_req; +} else { + new_req = (rtab_req != -1) ? rtab_req : core_req; +} + +accepted_req = round_robin(new_req, refill_req); +\end{lstlisting} + +To summarize: \ac{RTAB} ready requests have higher priority than core requests (this is to flush the pipeline as fast as possible). +However, if the \ac{RTAB} is full, the cache does not accept core requests because if they need to be put on-hold that could cause a deadlock. +Then, between the refill requests, the \ac{RTAB} or the core requests, the data cache applies a round-robin policy. + +\subsection{Possible improvements for the \acs*{RTAB} integration} + +\begin{itemize} + \item Avoid introducing a NOP after an entry is replayed (popped). + This is currently done to simplify the resolution of a concurrent \verb$alloc_and_link$ and \verb$pop_commit$ where the request being allocated depends on the one being popped. +\end{itemize} + + +\section{Write-buffer} + +This cache implements a write-through policy. +In this policy, the write accesses from requesters are systematically transferred to the memory, regardless of whether the write access hits or misses in the \ac{HPDcache}. + +To decouple the acknowledgement from the memory to the \ac{HPDcache}, and the acknowledgement from the \ac{HPDcache} to the requester, this \ac{HPDcache} implements a write-buffer. +The goal is to increase the performance: the requester does not wait the acknowledgement from the memory, which may suffer from a very high latency. +Additionally, to improve the bandwidth utilization of data channels in the \ac{NoC}, the write-buffer implements coalescing of write data. + +The write-buffer implements two different parts: directory and data. +The directory enables tracking of active writes. +The data buffers are used to coalesce writes from the requester. +Entries in the data buffers are usually wider ($\mathsf{CONF\_HPDCACHE\_WBUF\_WORDS}$) than the data interface of requesters. +This is to enable the coalescing of multiple writes onto contiguous addresses. + +A given entry in the directory of the write-buffer may be in four different states: + +\begin{center} +\begin{tabular}{lp{.8\linewidth}} +\textbf{FREE} & The entry is available.\\ +\textbf{OPEN} & The entry is currently used by a previous write access. The entry accepts new writes (coalescing).\\ +\textbf{PEND} & The entry is waiting to be sent to the memory. In this state, the entry continues to accept new writes (coalescing).\\ +\textbf{SENT} & The entry was forwarded to the memory, and is waiting for the acknowledgement.\\ +\end{tabular} +\end{center} + +\subsection{Memory Write Consistency Model} +\seclabel{wbuf_mcr} + +The \ac{HPDcache} complies with the \ac{RVWMO} memory consistency model. +Regarding writes, in this consistency model, there are two important properties: + +\begin{enumerate} + +\item The order in which write accesses on different addresses are forwarded to memory MAY differ from the order they arrived from the requester (program order); + +\item Writes onto the same address, MUST be visible in order. If there is a data written by a write A on address @x followed by an another write B on the same address, the data of A cannot be visible after the processing of B. + +\end{enumerate} + +The second property allows write coalescing if the hardware ensures that the last write persists. + +The write-buffer exploits the first property. +Multiple "in-flight" writes are supported due to the multiple directory and data entries. +These writes can be forwarded to the memory in an order different than the program order. + +To comply with the second property, the write-buffer does not accept a write when there is an address conflict with a \textbf{SENT} entry. +In that case, the write is put on-hold following the policy described in \secref{onhold}. +The system may choose to relax the constraint of putting a write on-hold in case of an address conflict with a \textbf{SENT} entry. +This can be relaxed when the \ac{NoC} guaranties in-order delivery. +The runtime configuration bit~$\mathsf{cfig\_wbuf.S}$~(see \tabref{cfig_wbuf}) shall be set to 0 to relax this dependency. + +\subsection{Functional Description} +\seclabel{wbuf_funcdesc} + +When an entry of the write-buffer directory is in the \textbf{OPEN} or \textbf{PEND} states, there is an allocated data buffer, and it contains data that has not yet been sent to the memory. +When an entry of the write-buffer directory is in the \textbf{SENT} state, the corresponding data was transferred to the memory, and the corresponding data buffer was freed (and can be reused for another write). +A given entry in the write-buffer directory goes from \textbf{FREE} to \textbf{OPEN} state when a new write is accepted, and cannot be coalesced with another \textbf{OPEN} or \textbf{PEND} entry (e.g. not in the same address range). + +A directory entry passes from \textbf{OPEN} to \textbf{PEND} after a given number of clock cycles. +This number of clock cycles depends on different runtime configurable values. +Each directory entry contains a life-time counter. +This counter starts at 0 when a new write is accepted (\textbf{FREE}->\textbf{OPEN}), and incremented each cycle while in \textbf{OPEN}. +When the counter reaches~$\mathsf{cfig\_wbuf.threshold}$~(see \tabref{cfig_wbuf}), the write-buffer directory entry goes to \textbf{PEND}. +Another runtime configurable bit, $\mathsf{cfig\_wbuf.R}$~(see \tabref{cfig_wbuf}), defines the behavior of an entry when a new write is coalesced into an \textbf{OPEN} entry. +If this last configuration bit is set, the life-time counter is reset to 0 when a new write is coalesced. +Otherwise, the counter keeps its value. + +The life-time of a given write-buffer directory entry is longer than the life-time of a data entry. +A given directory entry is freed (\textbf{SENT}->\textbf{FREE}) when the write acknowledgement is received from the memory. +The number of cycles to get an acknowledgement from the memory may be significant and it is system-dependent. +Thus, to improve utilization of data buffers, the number of entries in the directory is generally greater than the number of data buffers. +However, there is a trade-off between area and performance because the area cost of data buffers is the most critical cost in the write-buffer. +The synthesis-time parameters $\mathsf{CONF\_HPDCACHE\_WBUF\_DIR\_ENTRIES}$ and $\mathsf{CONF\_HPDCACHE\_WBUF\_DATA\_ENTRIES}$ define the number of entries in the write-buffer directory and write-buffer data, respectively. + +When the $\mathsf{cfig\_wbuf.I}$~(see \tabref{cfig_wbuf}) is set, the write buffer does not perform any write coalescing. This means that the entry passes from \textbf{FREE} to \textbf{PEND} (bypassing the \textbf{OPEN} state). The $\mathsf{cfig\_wbuf.threshold}$ is ignored by the write buffer. While an entry is in the \textbf{PEND} state, and $\mathsf{cfig\_wbuf.I}$ is set, that entry does not accept any new writes, and only waits for the data to be sent. + +\Todo{Investigate an implementation of the data buffers in RAM. +At most, only one entry is written and read per cycle in the write buffer. +Thus an implementation with a 2-ports RAM could be more efficient. +Write masks (at least at byte granularity) in the RAM are necessary to allow the write coalescing} + +\subsubsection{Memory Fences} + +In multi-core systems, or more generally, in systems with multiple \acs{DMA}-capable devices, when synchronization is needed, it is necessary to implement memory fences from the software. +In the case of RISC-V, there is specific instructions for this (i.e. fence). + +Fence instructions shall be forwarded to the cache to ensure ordering of writes. The fence will force the write-buffer to send all pending writes before accepting new ones. +This cache implements two ways of signalling a fence: sending a specific \ac{CMO} instruction from the core (described later on \charef{cmo}), or by asserting $\mathsf{wbuf\_flush\_i}$ pin (during one cycle). + + +\section{Cache-coherency} +\seclabel{cache_coherency} + +The current version of the cache does not implement any hardware cache-coherency protocol. + +In multi-core systems integrating this cache, cache-coherency needs to be enforced by the software. To this end, this cache provides cache invalidation instructions among the supported \acp{CMO}. These are described in \charef{cmo}. These can be used to solve the cache-obsolescence problem. + +As the cache implements a write-through policy, there is no memory-obsolescence problem. This is because all writes are forwarded to the memory. + + +%\textit{Note: To solve the memory obsolescence problem using a write-back policy, we need to implement dirty bits in the cache directory. +%However, to support a byte granularity, we would need to implement 64 bits per cacheline, which represents an overhead of almost 13\% in the RAM area of the cache (considering a 32 KB cache).} + +% \section{Write-back Data} + +% The directory implements a given number of dirty bits per cacheline. +%Dirty bits indicate which data has been locally modified in the cache and therefore, the next levels of memory contain an obsolete state. +%In case of a cacheline eviction, only "dirty" data is actually write-back upstream. + +% In the simplest case, L1 caches implement a single dirty bit. +%This means that anytime a modified cacheline needs to be evicted, even if a single bit has been modified, the entire cacheline is written-back. +%However, this behavior may cause problems when two different cores write in the same cacheline but different bytes (this situation is known as false-sharing). +%In that case, if no special mechanism is implemented, when a core writes-back a cacheline, it will potentially overwrite the changes from another core. +%When a write-invalidate cache-coherency protocol is implemented, this situation is avoided because a single core can own a modified cacheline. +%However, we do not implement a hardware cache-coherency protocol. + +% In our case, to avoid the false-sharing problem we need a finer granularity for indicating the dirty data. +%One way is to have more than 1 dirty bit per cacheline. +%For example, we could implement 1-bit per 32-bits words. +%As we have 64 bytes cachelines, this would need 16 dirty bits per cacheline. + +%\section{Cache Pipeline} + +\clearpage +\chapter{\acfp{CSR}} +\chalabel{csr} +\minitoc +\newpage + +\section{Dedicated CSR address space} +\seclabel{csr_address_space} + +\begin{tcolorbox}[colback=red!10!white, + colframe=white!10!red, + title=\textbf{Important}, + center, valign=top, halign=left, + center title, + width=.950\linewidth] +This CSR address space is not yet implemented in version 1.0.0 of the RTL. +In this version, runtime configuration values are passed through external ports of the HPDcache. +Performance counters are not implemented either. +\end{tcolorbox} + +The \ac{HPDcache} defines a dedicated memory address space for configuring and checking the internal status. +This memory space is shared among all the requesters connected to the same \ac{HPDcache}. +However, this space is private to those requesters in a system-wide point of view. +This is, this dedicated \ac{CSR} address space is not visible to other requesters integrated in the system. + +The dedicated \ac{CSR} address space is aligned to 4 Kibytes and has this same size. +Current version of the \ac{HPDcache} uses a very small subset of this address space, but the aligning to 4 Kibytes, allows easier mapping in the virtual address space by the \ac{OS}. +The smallest virtual/physical page size defined in the \citetitle{RISCV_privileged_spec}\cite{RISCV_privileged_spec} is 4 Kibytes. +This is the reason of this choice. +\Figref{csr_address_space} displays the layout of the dedicated \ac{CSR} address space of the \ac{HPDcache}. + +The $\mathsf{CFIG\_BASE}$ address is specified through an input port of the \ac{HPDcache}. +The name of this input pin is \lstinline{cfig_base_i}. +It is a multi-bit signal. +The number of bits is $\mathsf{CONF\_HPDCACHE\_PA\_WIDTH}$. + +\begin{figure}[!htbp] +\begin{center} + \begin{tikzpicture}[scale=.7] + \footnotesize + \draw[line width=2] (0,0) rectangle (4,8); + + % top line for segment 0 + \node at (2, 1) {\sf CFIG}; + \node [anchor=west] at (4, 0) {\sf CFIG\_BASE + 0x0000}; + \node [anchor=west] at (4, 2) {\sf CFIG\_BASE + 0x0200}; + \draw [line width=2](0,2) + -- (4,2); + + % top line for segment 1 + \node at (2, 3) {\sf CFIG\_HWPF}; + \node [anchor=west] at (4, 4) {\sf CFIG\_BASE + 0x0400}; + \draw [line width=2](0,4) + -- (4,4); + + % top line for segment 2 + \node at (2, 5) {\sf PERF}; + \node [anchor=west] at (4, 6) {\sf CFIG\_BASE + 0x0600}; + \draw [line width=2](0,6) + -- (4,6); + + \node [anchor=west] at (4, 8) {\sf CFIG\_BASE + 0x800}; + \draw[fill=lightgray, draw=black, line width=2] (0,6) rectangle (4,8); + \node [anchor=west] at (14, 8) {\sf CFIG\_BASE + 0x1000}; + \draw[fill=lightgray, draw=black, line width=2] (10,6) rectangle (14,8); + \node [anchor=west] at (14, 6) {\sf CFIG\_BASE + 0xe00}; + \draw[fill=lightgray, draw=black, line width=2] (10,4) rectangle (14,6); + \node [anchor=west] at (14, 4) {\sf CFIG\_BASE + 0xc00}; + \draw[fill=lightgray, draw=black, line width=2] (10,2) rectangle (14,4); + \node [anchor=west] at (14, 2) {\sf CFIG\_BASE + 0xa00}; + \draw[fill=lightgray, draw=black, line width=2] (10,0) rectangle (14,2); + \node [anchor=west] at (14, 0) {\sf CFIG\_BASE + 0x800}; + \end{tikzpicture} + \caption{Dedicated CSR address space}% + \figlabel{csr_address_space} +\end{center} +\end{figure} + +\newpage +\section{Configuration registers} + +\Tabref{csr_config} lists the configuration registers implemented in the \ac{HPDcache}. + +These are mapped on the $\mathsf{CFIG}$ memory address segment in \figref{csr_address_space}. + +\begin{figure}[!htbp] + \begin{center} + \caption{Configuration registers in the \ac*{HPDcache}}% + \tablabel{csr_config} + {\small% + \begin{tabular}{p{.25\textwidth}p{.44\textwidth}p{.21\textwidth}} + \textbf{CFIG Segment} & &\\ + \toprule + \textbf{Register} + & \textbf{Description} + & \textbf{Base address} \\ + \toprule + $\mathsf{cfig\_info}$ + & 64-bits register with cache information + & $\mathtt{~+~0x00}$\\ + \midrule + $\mathsf{cfig\_ctrl}$ + & 64-bits register for configuring the cache controller + & $\mathtt{~+~0x08}$\\ + \midrule + $\mathsf{cfig\_wbuf}$ + & 64-bits register for configuring the write-buffer + & $\mathtt{~+~0x10}$\\ + & & \\ + \textbf{CFIG\_HWPF Segment} & &\\ + \toprule + \textbf{Register} + & \textbf{Description} + & \textbf{Base address} \\ + \toprule + $\mathsf{cfig\_hwpf\_status}$ + & 64-bits register with the status of the hardware prefetcher + & $\mathtt{~+~0x200}$\\ + \midrule + \verb$for (i = 0; i < 4; i++) {$ & & \\ + \midrule + $\mathsf{cfig\_hwpf\_base\_engine[i]}$ + & 64-bits base cline register of the engine $\mathtt{i}$ of the hardware prefetcher + & $\mathtt{~+~0x200}$ + $\mathtt{+~(i~+~1)\times{}0x20~+~0x0}$\\ + \midrule + $\mathsf{cfig\_hwpf\_param\_engine[i]}$ + & 64-bits parameters register of the engine $\mathtt{i}$ of the hardware prefetcher + & $\mathtt{~+~0x200}$ + $\mathtt{+~(i~+~1)\times{}0x20~+~0x8}$\\ + \midrule + $\mathsf{cfig\_hwpf\_throttle\_engine[i]}$ + & 64-bits throttle register of the engine $\mathtt{i}$ of the hardware prefetcher + & $\mathtt{~+~0x200}$ + $\mathtt{+~(i~+~1)\times{}0x20~+~0x10}$\\ + \midrule + \verb$}$ & & \\ + \bottomrule + \end{tabular}} + \end{center} +\end{figure} + +\begin{minipage}{\textwidth} +\paragraph{$\mathbf{cfig\_info}$ - $\mathtt{~+~0x00}$}\mbox{}\\[1em] + \begin{bytefield}[endianness=big,bitwidth=\linewidth/64,% + boxformatting={\centering\footnotesize\sf}]{64} + \bitheader{0,7,8,15,16,19,20,23,48,63} \\ + \bitbox{16}{ID} &% + \bitbox{24}{\color{lightgray}\rule{\width}{\height}} &% + \bitbox{4}{HwPf} &% + \bitbox{4}{LnSz} &% + \bitbox{8}{Ways} &% + \bitbox{8}{Sets}% + \end{bytefield} + + \begin{center} + {\footnotesize\begin{tabular}{p{.03\textwidth}p{.17\textwidth}p{.03\textwidth}p{.25\textwidth}p{.35\textwidth}} + \textbf{Field} & \textbf{Description} & \textbf{Mode} & \textbf{Reset value} + & \textbf{Comment}\\ + \toprule + Sets & Number of sets & RO + & $\mathsf{CONF\_HPDCACHE\_SETS}$ + & Indicates the number of sets implemented.\\ + \midrule + Ways & Number of ways & RO + & $\mathsf{CONF\_HPDCACHE\_WAYS}$ + & Indicates the number of ways implemented.\\ + \midrule + LnSz & Number of bytes per cacheline (power of 2) & RO + & $\mathsf{log_2(HPDCACHE\_CL\_WIDTH/8)}$ + & It contains the $\mathsf{log_2}$ of the size in bytes of cachelines.\\ + \midrule + HwPf & Number of engines in the hardware prefetcher & RO + & 4 + & Indicates the number of simultaneous streams supported by the hardware prefetcher \\ + \midrule + ID & Version ID & RO + & $\mathtt{0xCEA0}$ + & Version ID of the \ac{HPDcache}.\\ + \bottomrule + \end{tabular}} + \end{center} +\end{minipage}\\[1em] + +\begin{minipage}{\textwidth} +\paragraph{$\mathbf{cfig\_ctrl}$ - $\mathtt{~+~0x08}$}\mbox{}\\[1em] + \begin{bytefield}[endianness=big,bitwidth=\linewidth/64,% + boxformatting={\centering\footnotesize\sf}]{64} + \bitheader{0,56,57,63} \\ + \bitbox{6}{\color{lightgray}\rule{\width}{\height}} &% + \bitbox{1}{A} &% + \bitbox{1}{R} &% + \bitbox{55}{\color{lightgray}\rule{\width}{\height}} &% + \bitbox{1}{E}% + \end{bytefield} + + \begin{center} + {\small\begin{tabular}{p{.05\textwidth}p{.30\textwidth}p{.05\textwidth}p{.05\textwidth}p{.40\textwidth}} + \textbf{Field} & \textbf{Description} & \textbf{Mode} & \textbf{Reset value} + & \textbf{Comment}\\ + \toprule + E & Cache Enable & RW & \texttt{0b0} + & When set to 0, all memory accesses are considered non-cacheable.\\ + \midrule + R & Single-entry RTAB (fallback mode) & RW & \texttt{0b0} + & This is a fallback mode. When set to 1, the cache controller only uses one entry in the \ac{RTAB}.\\ + \midrule + A & Forbid AMO mode (\secref{amo_implementation}) & RW & \texttt{0b0} + & When set to 1, the cache controller responds with an error to AMO requests targeting cacheable addresses.\\ + \bottomrule + \end{tabular}} + \end{center} +\end{minipage}\\[1em] + +\begin{minipage}{\textwidth} +\paragraph{$\mathbf{cfig\_wbuf}$ - $\mathtt{~+~0x10}$}\mbox{}\\[1em] + \begin{bytefield}[endianness=big,bitwidth=\linewidth/64,% + boxformatting={\centering\footnotesize\sf}]{64} + \bitheader{0,1,8,15,63} \\ + \bitbox{48}{\color{lightgray}\rule{\width}{\height}} &% + \bitbox{8}{Threshold}% + \bitbox{6}{\color{lightgray}\rule{\width}{\height}} &% + \bitbox{1}{S}% + \bitbox{1}{R}% + \end{bytefield} + + \begin{center} + \tablabel{cfig_wbuf} + {\small\begin{tabular}{p{.08\textwidth}p{.28\textwidth}p{.05\textwidth}p{.05\textwidth}p{.40\textwidth}} + \textbf{Field} & \textbf{Description} & \textbf{Mode} & \textbf{Reset value} + & \textbf{Comment}\\ + \toprule + R & Reset time-counter on write & RW & \texttt{0b1} + & When set to 1, writes restart the time-counter in the corresponding write-buffer entry.\\ + \midrule + S & Sequential Write-After-Write & RW & \texttt{0b0} + & When set to 1, the write-buffer holds-back writes requests that matches the target address of an on-the-fly write.\\ + \midrule + Threshold & Number of keep-alive cycles of entries in the write-buffer & RW & \texttt{0x04} + & The maximum accepted value is $\mathsf{CONF\_HPDCACHE\_WBUF\_TIMECNT\_MAX}$. + When set to 0, a write immediatly closes the corresponding entry.\\ + \midrule + I & Inhibit write coalescing & RW & \texttt{0b0} + & When set to 1, the write-buffer does not perform write coalescing. It accepts one write per entry.\\ + \bottomrule + \end{tabular}} + \end{center} +\end{minipage}\\[1em] + +\begin{minipage}{\textwidth} +\paragraph{$\mathbf{cfig\_hwpf\_*}$}\mbox{}\\[1em] +These registers are related to the hardware prefetcher. +They are mapped on the $\mathsf{CFIG\_HWPF}$ memory address segment.\\[1em] + +Details on hardware prefetcher configuration registers are in \secref{prefetch_csrs}. +\end{minipage}\\[1em] + + +\section{Performance counters} +\seclabel{csr_performance} + +The \ac{HPDcache} provides a set of performance counters. +These counters provide important information that can be used by software developers, at \ac{OS} level or user application level, to, for example, debug performance issues. + +\Tabref{csr_performance} lists the performance counters provided by the \ac{HPDcache}. +These are mapped on the $\mathsf{PERF}$ memory address segment in \figref{csr_address_space}. + +\begin{figure}[!htbp] + \begin{center} + \caption{Performance counters in the \ac*{HPDcache}}% + \tablabel{csr_performance} + {\footnotesize% + \begin{tabular}{lll} + \toprule + \textbf{Counter} + & \textbf{Description} + & \textbf{Base address} \\ + \midrule + $\mathsf{perf\_write\_req}$ + & 64-bits counter for processed write requests + & $\mathtt{^\alpha~+~0x00}$\\ + \midrule + $\mathsf{perf\_read\_req}$ + & 64-bits counter for processed read requests + & $\mathtt{^\alpha~+~0x08}$\\ + \midrule + $\mathsf{perf\_prefetch\_req}$ + & 64-bits counter for processed prefetch requests + & $\mathtt{^\alpha~+~0x10}$\\ + \midrule + $\mathsf{perf\_uncached\_req}$ + & 64-bits counter for processed uncached requests + & $\mathtt{^\alpha~+~0x18}$\\ + \midrule + $\mathsf{perf\_cmo\_req}$ + & 64-bits counter for processed \ac{CMO} requests + & $\mathtt{^\alpha~+~0x20}$\\ + \midrule + $\mathsf{perf\_accepted\_req}$ + & 64-bits counter for accepted requests + & $\mathtt{^\alpha~+~0x28}$\\ + \midrule + $\mathsf{perf\_cache\_write\_miss}$ + & 64-bits counter for write cache misses + & $\mathtt{^\alpha~+~0x30}$\\ + \midrule + $\mathsf{perf\_cache\_read\_miss}$ + & 64-bits counter for read cache misses + & $\mathtt{^\alpha~+~0x38}$\\ + \midrule + $\mathsf{perf\_on\_hold\_req}$ + & 64-bits counter for requests put on-hold + & $\mathtt{^\alpha~+~0x40}$\\ + \bottomrule + % empty + & $\mathtt{\alpha:~~=~~+~0x400}$ + & + \end{tabular}} + \end{center} +\end{figure} + + +\section{Event signals} +\seclabel{csr_events} + +In addition to the performance registers explained in \secref{csr_performance}, the \ac{HPDcache} provides a set of one-shot signals that indicate when a given event is detected. +As one-shot signals, they are set to 1 for one cycle each time the corresponding event is detected. +If the same event is detected N cycles in a row, the corresponding event signal will remain set to 1 for N cycles. +\Tabref{csr_events} lists these event signals. + +These event signals are output-only. +They can be either left unconnected, if they are not used, or connected with the remainder of the system. +The system can use those signals, for example, for counting those events externally or for triggering some specific actions. + +\begin{figure}[!htbp] + \begin{center} + \caption{Event signals in the \ac*{HPDcache}}% + \tablabel{csr_events} + {\footnotesize% + \begin{tabular}{ll} + \toprule + \textbf{Signal} + & \textbf{Event description} \\ + \midrule + $\mathsf{evt\_cache\_write\_miss\_o}$ + & Cache miss on write operation\\ + \midrule + $\mathsf{evt\_cache\_read\_miss\_o}$ + & Cache miss on read operation\\ + \midrule + $\mathsf{evt\_uncached\_req\_o}$ + & The cache processed an uncached request\\ + \midrule + $\mathsf{evt\_cmo\_req\_o}$ + & The cache processed a \ac{CMO} request\\ + \midrule + $\mathsf{evt\_write\_req\_o}$ + & The cache processed a write request\\ + \midrule + $\mathsf{evt\_read\_req\_o}$ + & The cache processed a read request\\ + \midrule + $\mathsf{evt\_prefetch\_req\_o}$ + & The cache processed a prefetch request\\ + \midrule + $\mathsf{evt\_on\_hold\_req\_o}$ + & The cache put on-hold a request\\ + \midrule + $\mathsf{evt\_rtab\_rollback\_o}$ + & A replayed request has been pushed back into the replay table\\ + \midrule + $\mathsf{evt\_stall\_refill\_o}$ + & A new request is stalled because of an ongoing refill operation\\ + \midrule + $\mathsf{evt\_stall\_o}$ + & A new request is stalled because of a hazard in the pipeline (it includes refill stalls)\\ + \bottomrule + \end{tabular}} + \end{center} +\end{figure} + + + +\chapter{Cache Management Operations (CMOs)} +\chalabel{cmo} +\minitoc +\newpage + +The \ac{HPDcache} is able of performing the following \aclp{CMO}: + +\begin{itemize} + \item memory write fence; + \item invalidate a cacheline given a physical address; + \item invalidate one or more cachelines in a given set given the set and one or more ways; + \item invalidate all the cachelines; + \item prefetch the cacheline indicated by its physical address. +\end{itemize} + +Any of the clients of the HPDCACHE can trigger one of this operation anytime by using specific opcodes in their request. + +\begin{table}[h!] +\begin{center} +\caption{CMO operation types}% +\tablabel{dcache_req_cmo} +{\footnotesize +\begin{tabular}{lll} + \toprule + \textbf{Mnemonic} + & \textbf{Encoding} + & \textbf{Type} \\ + \midrule + $\mathsf{HPDCACHE\_CMO\_FENCE}$ + & 0b000 + & Memory write fence.\\ + \midrule + $\mathsf{HPDCACHE\_CMO\_INVAL\_NLINE}$ + & 0b010 + & Invalidate a given cacheline.\\ + \midrule + $\mathsf{HPDCACHE\_CMO\_INVAL\_SET\_WAY}$ + & 0b011 + & Invalidate one or more ways of in a given set of the cache.\\ + \midrule + $\mathsf{HPDCACHE\_CMO\_INVAL\_ALL}$ + & 0b100 + & Invalidate the entire cache.\\ + \midrule + $\mathsf{HPDCACHE\_CMO\_PREFETCH}$ + & 0b101 + & Prefetch a given cacheline.\\ + \midrule +\end{tabular}} +\end{center} +\end{table} + +The $\mathsf{HPDCACHE\_REQ\_OP}$ must be set to $\mathsf{HPDCACHE\_REQ\_CMO}$ (see \tabref{dcache_req_op}). +The \ac{CMO} subtype (\tabref{dcache_req_cmo}) is transferred into the $\mathsf{HPDCACHE\_REQ\_SIZE}$ signal of the request. + +The following sections describe in detail each of the \ac{CMO} operations, and how the requests shall be encoded to trigger each of them. + + +\newpage +\section{Memory write fence} + +To make sure that the \ac{HPDcache} accepts new requests only when all previous writes are sent and acknowledged from the memory, a requester can issue a fence operation. + +To do this, the requester shall build the request as follows: + +{\centering\footnotesize\begin{tabular}{p{.38\linewidth}p{.55\linewidth}} + \toprule + \textbf{Signal} + & \textbf{Value} \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_ADDR}$ + & *\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_OP}$ + & $\mathsf{HPDCACHE\_REQ\_CMO}$ \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_WDATA}$ + & *\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_BE}$ + & *\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_SIZE}$ + & $\mathsf{HPDCACHE\_CMO\_FENCE}$ \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_UNCACHEABLE}$ + & *\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_SID}$ + & Corresponding source ID of the requester\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_TID}$ + & Transaction identifier from the requester\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_NEED\_RSP}$ + & *\\ + \bottomrule + & * means don't care \\ +\end{tabular}} + +As for any regular request, the request shall follow the \textbf{VALID}/\textbf{READY} handshake protocol described in \secref{if_valid_ready}. + +This operation has the following effects: +\begin{itemize} +\item All open entries in the write buffer (write requests waiting to be sent to the memory) are immediately closed; +\item No new requests from any requester are acknowledged until all pending write requests in the cache have been acknowledged on the \ac{NoC} interface. +\end{itemize} + + +\newpage +\section{Invalidate a cacheline by its physical address} + +To invalidate a cacheline by its physical address, the requester shall build the request as follows: + +{\centering\footnotesize\begin{tabular}{p{.38\linewidth}p{.55\linewidth}} + \toprule + \textbf{Signal} + & \textbf{Value} \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_ADDR}$ + & Physical address to invalidate in the cache.\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_OP}$ + & $\mathsf{HPDCACHE\_REQ\_CMO}$ \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_WDATA}$ + & *\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_BE}$ + & *\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_SIZE}$ + & $\mathsf{HPDCACHE\_CMO\_INVAL\_NLINE}$ \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_UNCACHEABLE}$ + & *\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_SID}$ + & Corresponding source ID of the requester\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_TID}$ + & Transaction identifier from the requester\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_NEED\_RSP}$ + & *\\ + \bottomrule + & * means don't care \\ +\end{tabular}} + +As for any regular request, the request shall follow the \textbf{VALID}/\textbf{READY} handshake protocol described in \secref{if_valid_ready}. + +For the sake of design simplification, this operation works as a memory read fence. +That is, before handling the operation, the \ac{HPDcache} waits for all pending read misses to complete. +Future versions of the HPDcache could wait only for a pending read miss on the same address that is being invalidated. + +If the given physical address is not cached, the operation does nothing. +However it still works as a memory read fence. + +Regarding the latency of this operation, it depends on the time to serve all pending read misses. +Only one cycle is needed to invalidate the corresponding cacheline. + + +\newpage +\section{Invalidate a group of cachelines by their a set and way} + +To invalidate a group of cachelines, the requester shall build the request as follows: + +{\centering\footnotesize\begin{tabular}{p{.38\linewidth}p{.55\linewidth}} + \toprule + \textbf{Signal} + & \textbf{Value} \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_ADDR}$ + & Index of the set to invalidate.\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_OP}$ + & $\mathsf{HPDCACHE\_REQ\_CMO}$ \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_WDATA}$ + & Bit-vector with target ways to invalidate. + The number of bits decoded depends on the number of ways implemented ($\mathsf{CONF\_HPDCACHE\_WAYS}$). + The least significant bit corresponds to way 0, the second to way 1, etc.\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_BE}$ + & *\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_SIZE}$ + & $\mathsf{HPDCACHE\_CMO\_INVAL\_SET\_WAY}$\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_UNCACHEABLE}$ + & *\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_SID}$ + & Corresponding source ID of the requester\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_TID}$ + & Transaction identifier from the requester\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_NEED\_RSP}$ + & *\\ + \bottomrule + & * means don't care \\ +\end{tabular}} + +As for any regular request, the request shall follow the \textbf{VALID}/\textbf{READY} handshake protocol described in \secref{if_valid_ready}. + +For the sake of design simplification, this operation works as a memory read fence. +That is, before handling the operation, the \ac{HPDcache} waits for all pending read misses to complete. +Future versions of the HPDcache could wait only for a pending read misses on the same set that is being invalidated. + +If the given set and ways contains no valid cachelines, the operation does nothing. +However it still works as a memory read fence. + +Regarding the latency of this operation, it depends on the time to serve all pending reads. +Only one cycle is needed to invalidate the given set and ways because the ways are invalidated simultaneously. + + +\newpage +\section{Invalidate the entire cache} + +With this operation, all the cachelines in the \ac{HPDcache} are invalidated. + +To perform a complete invalidation of the \ac{HPDcache}, the requester shall build the request as follows: + +{\centering\footnotesize\begin{tabular}{p{.38\linewidth}p{.55\linewidth}} + \toprule + \textbf{Signal} + & \textbf{Value} \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_ADDR}$ + & *\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_OP}$ + & $\mathsf{HPDCACHE\_REQ\_CMO}$ \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_WDATA}$ + & *\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_BE}$ + & *\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_SIZE}$ + & $\mathsf{HPDCACHE\_CMO\_INVAL\_ALL}$\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_UNCACHEABLE}$ + & *\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_SID}$ + & Corresponding source ID of the requester\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_TID}$ + & Transaction identifier from the requester\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_NEED\_RSP}$ + & *\\ + \bottomrule + & * means don't care \\ +\end{tabular}} + +As for any regular request, the request shall follow the \textbf{VALID}/\textbf{READY} handshake protocol described in \secref{if_valid_ready}. + +This operation works as a memory read fence. +This is, before handling the operation, the \ac{HPDcache} waits for all pending read misses to complete. + +Regarding the latency of this operation, it has two aggregated components: + +\begin{itemize} +\item The time to serve all pending reads. +\item One cycle per set implemented in the \ac{HPDcache} (all ways of a given set are invalidated in simultaneously). +\end{itemize} + + +\newpage +\section{Prefetch a cacheline given its physical address} + +With this operation, the cacheline corresponding to the indicated physical address is (pre-)fetched into the \ac{HPDcache} + +To perform a prefetch, the requester shall build the request as follows: + +{\centering\footnotesize\begin{tabular}{p{.38\linewidth}p{.55\linewidth}} + \toprule + \textbf{Signal} + & \textbf{Value} \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_ADDR}$ + & *\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_OP}$ + & $\mathsf{HPDCACHE\_REQ\_CMO}$ \\ + \midrule + $\mathsf{HPDCACHE\_REQ\_WDATA}$ + & *\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_BE}$ + & *\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_SIZE}$ + & $\mathsf{HPDCACHE\_CMO\_PREFETCH}$\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_UNCACHEABLE}$ + & *\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_SID}$ + & Corresponding source ID of the requester\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_TID}$ + & Transaction identifier from the requester\\ + \midrule + $\mathsf{HPDCACHE\_REQ\_NEED\_RSP}$ + & Indicates if the requester needs an acknowledgement when the prefetch of the cacheline is completed.\\ + \bottomrule + & * means don't care \\ +\end{tabular}} + +As for any regular request, the request shall follow the \textbf{VALID}/\textbf{READY} handshake protocol described in \secref{if_valid_ready}. + +If the requested cacheline is already in the cache, at the moment the request is processed, this request has no effect. If the requested cacheline is not present in the cache, the cacheline is fetched from the memory and replicated into the cache. + +When the prefetch transaction is completed, and the $\mathsf{HPDCACHE\_REQ\_NEED\_RSP}$ signal was set to 1, an acknowledgement is sent to the corresponding requester. + + +\chapter{Atomic Memory Operations (AMOs)} +\chalabel{amo} +\minitoc +\newpage + +\section{Background} + +The \acp{AMO} are special load/store accesses that implements a read-modify-write semantic. +A single instruction is able to read a data from the memory, perform an arithmetical/logical operation on that data, and store the result. +All this is performed as a single operation (no other operation can come in between the read-modify-write operations). + +These operations are meant for synchronization in multi-core environments. +To enable this synchronization, \acp{AMO} need to be performed on the \acf{PoS}, point where all accesses from the different cores converge. +This is usually a shared cache memory (when multiple levels of cache are implemented) or the external RAM controllers. +Thus, the \ac{HPDcache} needs to forward these operations to the \ac{PoS} through the \ac{NoC} interface. + +\section{Supported \acp*{AMO}} + +On the interface from requesters, the supported \acp{AMO} are the ones listed in \tabref{dcache_req_op}. +The supported \acp{AMO} are the ones defined in the atomic (A) extension of the RISC-V ISA specification: \citetitle{RISCV_spec}\cite{RISCV_spec}. + + +\section{Implementation} +\seclabel{amo_implementation} + +This cache does not implement a hardware cache-coherency protocol. +Therefore, the software needs to solve the cache obsolescence problem to ensure it reads the last value of the shared data. +There are two common ways of doing this: +\begin{enumerate} +\item Statically, by placing all shared data into uncacheable segments (never replicated in the \ac{HPDcache}); + +\item Dynamically, by explicitly invalidating local copies of shared data from the \ac{HPDcache}. +\end{enumerate} + +The cache obsolescence problem applies to \acp{AMO}. +As these operations are used for implementing synchronization mechanism, the manipulated data is by nature shared and need to be coherent between the different caches. +The \ac{HPDcache} implements two different modes for handling \acp{AMO}: + +\begin{description} +\item[Replicated AMO mode] \mbox{}\\% +Forward the \ac{AMO} to the \ac{PoS}, and wait for the response with the old data. +If the data of the target address is replicated in the \ac{HPDcache}, the \ac{HPDcache} computes the new value locally, and updates the target word in the corresponding cacheline. +With this solution the modified word with the \ac{AMO} will be up to date (coherent) with respect to the value in memory. +This solution needs explicit treatment from the software. +In particular, it requires that synchronization variables are always written (and possibly read) using \acp{AMO}. +For reading a shared variable, the software has two possibilities: (1) send an \ac{AMO} that does not modify the memory (e.g. AMOOR with bit-mask equal to zero); (2) invalidate the local cacheline prior to issuing the load instruction. + +\item[Forbid AMO mode] \mbox{}\\% +This is a much more strict policy. +\acp{AMO} can only be performed on uncacheable memory addresses. +If requests do not follow this rule, an exception is signalled from the cache to the corresponding requester. + +\end{description} + +The \ac{HPDcache} supports both modes, but only one can be active at any given time. +The \verb$cfig_error_on_cacheable_amo$ configuration bit selects the mode of operation. +When this bit is set to 0, the \textbf{Replicated AMO mode} is active. +When this bit is set to 1, the \textbf{Forbid AMO mode} is active. + +The \ac{HPDcache} handle \acp{AMO} as non-allocating operations, regardless of the \ac{AMO} mode described above. +This is, \acp{AMO} never fetch a replica of the target cacheline from the memory to the cache. +If the target cacheline IS NOT replicated in the cache, the \ac{AMO} modifies the memory. +If the target cacheline IS replicated in the cache, the \ac{AMO} modifies both the memory and the cache. + +\section{AMO ordering} + +As specified in the RISC-V ISA specification~\cite{RISCV_spec}, the base RISC-V ISA has a relaxed memory model. +To provide additional ordering constraints, \acp{AMO} (including \acs*{LR}/\acs*{SC}) specify two bits, \textit{aq} and \textit{rl}, for \textit{acquire} and \textit{release} semantics. + +The \ac{HPDcache} always ignores \textit{aq} and \textit{rl} bits. +It considers that they are always set. +Hence, \ac{HPDcache} handles \acp{AMO} always as sequentially consistent memory operations. +The \ac{HPDcache} waits for all pending read and write operations to complete before serving the \ac{AMO} request. + +This behavior implies that when the \ac{HPDcache} forwards an \ac{AMO} to the \ac{NoC}, it will be the only pending request from the \ac{HPDcache}. +In addition, no new requests from the requesters are served until the \ac{AMO} is completed. + + +\section{LR/SC support} + +\acf{LR} and \acf{SC} are part of the Atomic (A) extension of the RISC-V ISA specification~\cite{RISCV_spec}. +These instructions allow {\it "complex atomic operations on a single memory word or double-word"}. + +The \ac{HPDcache} fully supports all the instructions of the A extension of the RISC-V ISA, including \ac{LR} and \ac{SC} operations. + +In the specification of these instructions in the RISC-V ISA document, some details are dependent to the implementation. +Namely, the size of the reservation set and the return code of a \ac{SC} failure. + + +\subsection{LR/SC reservation set} + +When a requester executes a \ac{LR} operation, it "reserves" a set of bytes in memory. +This set contains at least the bytes solicited in the request but may contain more. +RISC-V ISA defines two sizes for \ac{LR} operations: 4 bytes or 8 bytes. +{\bf The \ac{HPDcache} reserves 8-bytes (double-word) containing the addressed memory location regardless of whether the \ac{LR} size is 4 or 8 bytes}. +The start address of the reservation set is a 8-bytes aligned address. + +When the \ac{LR} size is 8 bytes, the address is also aligned to 8 bytes (\secref{if_addr_data_alignment}). +In this case, the reservation set matches exactly the address interval defined in the request. +When the \ac{LR} size is 4 bytes, there are two possibilities: (1)~the target address is not aligned to 8 bytes. +The start address of the reservation set contains additional 4 bytes before the target address ; +(2)~the target address is aligned to 8 bytes. +The reservation set starts at the target address but contains additional 4 bytes after the requested ones. + +In summary, in case of \ac{LR} operation, the reservation set address range is computed as follows: + +\begin{equation*} + \mathbf{reservation\_set =} + \begin{cases} + \mathsf{\lfloor{}HPDCACHE\_REQ\_ADDR / 8\rfloor{} \times 8} & + (start\_address) \\ + \mathsf{(\lfloor{}HPDCACHE\_REQ\_ADDR / 8\rfloor{} \times 8) + 8} & + (end\_address) \\ + \end{cases} +\end{equation*} + +{\bf When a requester executes a \ac{SC} operation, the \ac{HPDcache} forwards the operation to the memory ONLY IF the bytes addressed by the \ac{SC} are part of an active reservation set}. +If the \ac{SC} accesses a smaller number of bytes that those in the active reservation set but within that reservation set, the \ac{SC} is still forwarded to the memory. + +The \ac{HPDcache} keeps an unique active reservation set. +If multiple requesters perform \ac{LR} operations, the unique active reservation set is the one specified by the last \ac{LR} operation. + +\Todo{Could this be a problem ? Should we keep one reservation set per requester ?} + +After a \ac{SC} operation, the unique active reservation set, if any, is invalidated. +This is regardless whether the \ac{SC} operation succeeds or not. + + +\subsection{\ac*{SC} failure response code} +\seclabel{sc_failure_code} + +The RISC-V ISA~\cite{RISCV_spec} specifies that when a \ac{SC} operation succeeds, the core shall write zero into the destination register of the operation. +Otherwise, in case of \ac{SC} failure, the core shall write a non-zero value into the destination register. + +The \ac{HPDcache} returns the status of an \ac{SC} operation into the $\mathsf{HPDCACHE\_RSP\_RDATA}$~(\tabref{if_rsp}) signal of the response interface to requesters. +The following table specifies the values returned by the \ac{HPDcache} into the $\mathsf{HPDCACHE\_RSP\_RDATA}$ signal in case of \ac{SC} operation. + +\begin{center} + {\small% + \begin{tabular}{p{.15\linewidth}p{.20\linewidth}} + \toprule + \textbf{Case} + & \textbf{Return value (status)}\\ + \midrule + SC success + & \texttt{0x0000\_0000} \\ + \midrule + SC failure + & \texttt{0x0000\_0001} \\ + \bottomrule + \end{tabular}} +\end{center} + +Depending on the specified size ($\mathsf{HPDCACHE\_REQ\_SIZE}$) in the request (\tabref{if_rsp}), the returned value is extended with zeros on the most significant bits. +This is, if the SC request size is 8 bytes, and the SC is a failure, then the returned value is \texttt{0x0000\_0000\_0000\_0001}. + +In addition, if the width of the $\mathsf{HPDCACHE\_RSP\_RDATA}$ signal is wider than the size of the SC request, the return value is replicated $\mathsf{CONF\_HPDCACHE\_REQ\_WORDS}$~(\tabref{dcache_parameters}) times. + + +\chapter{Hardware Memory Prefetcher} +\minitoc +\newpage + +In order to predict future data accesses and reduce the data cache miss rate, the cache implements a programmable hardware mechanism allowing to prefetch cachelines before they are actually requested. + +The \ac{HPDcache} implements a prefetcher that contains multiple prefetch engines. +Each prefetch engine works independently, and simultaneously. +A round-robin arbiter at the output of the prefetcher allows to select one prefetch request from one of the engines per cycle. +This arbiter guarantees the correct behavior when multiple prefetch engines are active. + +Each engine, if activated, fetches a stream of cachelines. +A stream is defined as a sequence of prefetch requests. +An engine reads one or multiple blocks of a given number of cachelines. +The first block starts at a given base cacheline. +Between blocks, one can configure a given address offset (also known as the stride). + +The four parameters (base cacheline, number of cachelines in a block, number of blocks, and the stride) of each stream (one per engine) are configured through dedicated \acp{CSR}. + +\section{Triggering} + +A given prefetcher engine starts operating when the following conditions are met: + +\begin{enumerate} +\item Each engine implements an enable bit in its dedicated \acp{CSR}. +This enable bit shall be set to 1 to allow the triggering of a given prefetcher engine. + +\item Each enabled engine (condition 1 is met), snoops on the requests ports from the requesters. +If there is a match between the issued address and the configured base cacheline of the engine, the engine starts the prefetching. + +\end{enumerate} + +Once an engine starts its operation, it does not snoop anymore the request ports. +At that moment, it issues a sequence of prefetch operations starting from $\mathsf{base\_cline}$, until the cacheline in the equation below. +When the last cacheline is reached, the behavior of the engine is described in the following section. + +\begin{equation*} + \mathsf{end\_cline = base\_cline + (Nblocks + 1)\times{}(Stride + 1)} +\end{equation*} + +A prefetch operation behaves as a read in the cache, but no data is expected in response by the prefetcher. +This means, that prefetch operations do not need to enable the data array of the cache (thus reducing the energy consumption for this operation). +Prefetch operations only access cache directory memories to check if the requested cacheline is cached or if it needs to be fetched from the memory. + +\begin{tcolorbox}[colbacktitle=green!50!black, + colback=green!10!white, + colframe=green!50!black, + title=\textbf{Programming note}, + center, valign=top, halign=justify, + center title, + width=.950\linewidth] + +As explained in this section, a requester needs to issue a load transaction within the base cacheline of an engine to start its operation. + +When the requester is a programmable processor core, an additional feature that could be implemented in the core is a software prefetch instruction. +This instruction would allow the software to prefetch a given cacheline, without stalling the core while waiting the response from the cache. +Such instruction could also be used to start an enabled prefetcher engine. + +In RISC-V cores, one possibility to implement this software prefetch instruction could be to use the following: + +\vspace{.8em} + +\begin{verbatim} +lw x0, offset(rs1) +\end{verbatim} + +\vspace{.8em} + +As the \verb|x0| register is always equal to zero, the data is dropped. +Therefore, an efficient implementation of this instruction in the core consists on forwarding the load to the L1 data cache but do not wait for the response. +\end{tcolorbox} + +\section{Activation/Deactivation Policies} +\seclabel{prefetch_activation_policies} + +The prefetcher engines implement different automatic activation/deactivation policies: + +\begin{tabular}{p{.3\linewidth}p{.65\linewidth}} +\toprule +\textbf{Policies} +& \\ +\midrule +\textbf{Disarm when finished} +& When the engine completes the configured stream, it is automatically disabled.\\ +\midrule + +\textbf{Rearm when finished} +& When the prefetcher completes the configured stream, it does not disable. +However, it stops and waits to be triggered again. + +At that point, the base cacheline \ac{CSR} of the engine saves the last accessed cacheline plus the stride. +This is, it saves the next address to prefetch. + +The \acp{CSR} for the number of blocks, cachelines per block and stride keep their originally configured values.\\ +\midrule + +\textbf{Rearm and Cycle when finished} +& In this policy, the prefetch engine behaves as in the "Rearm when finished" policy, but the base cacheline \ac{CSR} is reset to the originally configured value.\\ + +\end{tabular} + +\section{CSRs} +\seclabel{prefetch_csrs} + +Each prefetcher engine has three dedicated \acp{CSR}. + +\begin{itemize} + \item Base cline (base cacheline) - $\mathsf{cfig\_hwpf\_base\_engine}$~(see \tabref{csr_config}) + + \begin{bytefield}[endianness=big,bitwidth=\linewidth/64,% + boxformatting={\centering\footnotesize\sf}]{64} + \bitheader{0,1,2,3,4,5,6,63} \\ + \bitbox{58}{Base~cline} &% + \bitbox{2}{\color{lightgray}\rule{\width}{\height}} &% + \bitbox{1}{U} &% + \bitbox{1}{C} &% + \bitbox{1}{R} &% + \bitbox{1}{E}% + \end{bytefield} + + \begin{tabular}{ll} + \textbf{E:} & Enable bit \\ + \textbf{R:} & Rearm bit \\ + \textbf{C:} & Cycle bit \\ + \textbf{U:} & Upstream bit \\ + \end{tabular} + + \begin{tabular}{lll} + \textbf{Mode} & \textbf{R} & \textbf{C} \\ + Disarm when finished & 0 & X \\ + Rearm when finished & 1 & 0 \\ + Cycle and rearm when finished & 1 & 1 \\ + \end{tabular} + + \item Parameters - $\mathsf{cfig\_hwpf\_param\_engine}$~(see \tabref{csr_config}) + + \begin{bytefield}[endianness=big,bitwidth=\linewidth/32,% + boxformatting={\centering\footnotesize\sf}]{32} + \bitheader[lsb=32]{32,47,48,63} \\ + \bitbox{16}{Nblocks} &% + \bitbox{16}{Nlines} &% + \\[.5em]% + \bitheader[lsb=0]{0,31} \\ + \bitbox{32}{Stride} &% + \end{bytefield} + + \item Throttle - $\mathsf{cfig\_hwpf\_throttle\_engine}$~(see \tabref{csr_config}) + + \begin{bytefield}[endianness=big,bitwidth=\linewidth/32,% + boxformatting={\centering\footnotesize\sf}]{32} + \bitheader[lsb=0]{0,15,16,31} \\ + \bitbox{16}{Ninflight} &% + \bitbox{16}{Nwait} &% + \end{bytefield} +\end{itemize} + +\begin{description} + +\item [\textsf{Stride} parameter] \mbox{}\\ +It is an unsigned, one-based ($\mathsf{value + 1}$), 32-bits wide value. +The stride is in number of cachelines. +This means that the stride is always a multiple of $\mathsf{(HPDCACHE\_CL\_WIDTH/8)}$ bytes. + +\item [\textsf{Nblocks} parameter] \mbox{}\\ +It is an unsigned, one-based ($\mathsf{value + 1}$), 16-bits wide value. +This value corresponds to the number of blocks to prefetch. +The 16-bit value allows the prefetcher to prefetch up to 65536 blocks (of at least one cacheline). +This parameter is clearly over-dimensioned with respect to the usual capacity of the \ac{HPDcache} (e.g. 512, 64-byte, cachelines with a 32KB capacity). + +\item [\textsf{Nlines} parameter] \mbox{}\\ +It is an unsigned, one-based, 16-bits wide value. +It indicates the number of cachelines within blocks. +As the number of bits is 16, the maximum number of cachelines in a given block is 65536. + +\item [\textsf{Nwait} parameter] \mbox{}\\ +It is an unsigned, one-based, 16-bits wide value. +It defines the number of cycles (plus 1) between two requests of the prefetcher engine. +The zero value indicates that the engine can issue a request every cycle. + +\item [\textsf{Ninflight} parameter] \mbox{}\\ +It is an unsigned, zero-based, 16-bits wide value. +It defines the maximum number of in-flight (sent but not yet acknowledged) transactions from the prefetcher engine. +This parameter allows to throttle the memory bandwidth solicited by the prefetcher engine. +The zero value indicates that the number of in-flight transactions is unlimited. + +\item [\textsf{U} bit] \mbox{}\\ +When this upstream bit is set, prefetch operations targets the next level in the memory hierarchy. +In this case, upstream prefetch operations do not allocate cachelines in the L1 data-cache. +These are forwarded to the next memory level that can then prefetch the requested address. +THIS BIT IS NOT CURRENTLY IMPLEMENTED AND IGNORED. + +\item [\textsf{C} bit] \mbox{}\\ +This bit is only considered when the R bit (rearm) is also set. +When this cycle bit is set, after the prefetcher engine completes the prefetch stream, it resets the base cacheline to the originally configured one (see \secref{prefetch_activation_policies}). + +\item [\textsf{R} bit] \mbox{}\\ +When this rearm bit is set, after the prefetcher engine completes the prefetch stream, it "rearms" itself (remains enabled), and snoops for core requests (see \secref{prefetch_activation_policies}). +The address it snoops after it finishes depends on the Cycles (C) bit. +If the C bit is set, the behavior is described here above. +If the C bit is unset, after the prefetcher engine finishes, the snoop address is set to $\mathsf{end\_cline}$. + +\end{description} + +There is also a global status register for the prefetcher to monitor the status of the different prefetcher engines: + +\begin{itemize} + \item Status register + + \begin{bytefield}[endianness=big,bitwidth=\linewidth/16,% + boxformatting={\centering\footnotesize\sf}]{16} + \bitheader[lsb=48]{48,63} \\ + \bitbox{16}{\color{lightgray}\rule{\width}{\height}} &% + \\[.5em]% + \bitheader[lsb=32]{32,33,34,35,36,47} \\ + \bitbox{12}{\color{lightgray}\rule{\width}{\height}} &% + \bitbox{1}{P3 busy} &% + \bitbox{1}{P2 busy} &% + \bitbox{1}{P1 busy} &% + \bitbox{1}{P0 busy}% + \\[.5em]% + \bitheader[lsb=16]{16,19,20,30,31} \\ + \bitbox{1}{Free} &% + \bitbox{11}{\color{lightgray}\rule{\width}{\height}} &% + \bitbox{4}{Free Index} &% + \\[.5em]% + \bitheader[lsb=0]{0,1,2,3,4,15} \\ + \bitbox{12}{\color{lightgray}\rule{\width}{\height}} &% + \bitbox{1}{P3 en} &% + \bitbox{1}{P2 en} &% + \bitbox{1}{P1 en} &% + \bitbox{1}{P0 en}% + \end{bytefield} +\end{itemize} + +\begin{description} + \item [\textsf{P0-P3~enable bits}] \mbox{}\\ + Indicate if the corresponding prefetcher is enabled. + + \item [\textsf{Free~index bits}] \mbox{}\\ + Indicate the index (decimal) of the first available prefetcher from 0 to 3. + The software can use this information to easily compute the address offset of the configuration registers of the target prefetcher engine. + + \item [\textsf{Free bit}] \mbox{}\\ + It is set when the \emph{Free Index} is valid, this is, when there is effectively a free prefetcher. + + \item [\textsf{P0-P3~busy bits}] \mbox{}\\ + Indicate if the corresponding prefetcher is busy (it is enabled and active). + +\end{description} + +\section{Prefetch Request Algorithm} + +\Algref{prefetch_issue} shows how a prefetch engine calculates the address to prefetch and the operation of the throttling mechanisms. + +\begin{figure}[hptb] +\begin{lstlisting}[% +frame=single, language=c, numbers=left,% +basicstyle=\footnotesize] +const bit [63:0] LINES_PER_BLOCK = Nlines + 1; +const bit [63:0] BLOCK_INCREMENT = Stride + 1; +bit [63:0] block_nline; + +// Iterates over the blocks of cachelines +block_nline = Base_cline; +for (nb = 0; nb < (Nblocks + 1); nb++) + // Iterates over the cachelines within each block. + for (nl = 0; nl < LINES_PER_BLOCK; nl++) { + // Skip the first cacheline of the first block as it is already requested by the + // request triggering the prefetcher + if ((nl == 0) && (nb == 0)) continue; + + if (Ninflight > 0) { + // Wait while the number of in-flight prefetch requests is equal to the configured + // threshold (Ninflight). This is a throttling mechanism. + // The inflight counter is decremented by another process each time the prefetcher + // receives an acknowledgement for an inflight request. + while (inflight >= Ninflight) { + wait (1); // cycles + } + } + + // Send the prefetch operation for the calculated cacheline. + // Cachelines are contiguous within a block. + prefetch_address((block_nline + nl)*64); + + // The inflight counter is incremented each time the prefetcher sends a prefetch request. + inflight++; + + // Wait a given number of cycles between two prefetch requests. + // This is a throttling mechanism + wait (Nwait + 1); // cycles + } + + // The first cacheline of a block is offset (defined by the Stride) + // with respect to the previous block. + block_nline += BLOCK_INCREMENT; +} + +// If the cycle bit is not set, update the Base_cline with the +// address that would follow the last accessed one. +if (!cfig.base_cline.c) { + Base_cline = block_nline; +} +\end{lstlisting} +\caption{\alglabel{prefetch_issue}Request issuing algorithm of prefetch engines} +\end{figure} + +\section{Prefetch Abort} + +It is possible for the user to abort an active prefetch sequence from an engine. +To do that, the user can reset to 0 the $\mathsf{enable(E)}$ bit in the $\mathsf{base\_cline}$ \ac{CSR} register of the corresponding prefetcher engine. + +Such action, makes the corresponding target engine to stop its current sequence of prefetch requests. +If there were inflight not-yet-acknowledge requests from that engine, it will wait for the corresponding acknowledgements. +During this time, the prefetcher engine is not usable, and its corresponding $\mathsf{busy}$ bit in the $\mathsf{Status}$ \ac{CSR} is kept set to 1. +While the $\mathsf{busy}$ bit is set to 1, any write in \ac{CSR} registers of that prefetcher engine has no effect on the engine behavior. +However, the modified \ac{CSR} registers will keep the written values. + +When all acknowledgements are received, the corresponding prefetcher engine has its $\mathsf{busy}$ bit set to 0. +All other \ac{CSR} of the prefetcher engine keep their configured values. +At this point, the prefetcher engine is usable and can be reconfigured normally. + +% +% appendix +% +\appendix + +\chapter{Appendices} +\minitoc +\newpage + +\section{RAM macros} +\apxlabel{ram_macros} + +This cache uses memory arrays in multiple subcomponents. +When targeting \acs{ASIC}/\acs{FPGA} implementations integrating this cache, memory arrays shall be implemented using technology-specific \acs{SRAM} macros. +In the case of \acs{FPGA} implementations, this is less critical because synthesis tools for \acs{FPGA} usually select automatically embedded RAMs when possible. + +\Tabref{ram_macros} summarises the instances of RAM macros implemented in the \ac{HPDcache}. +This table has: +\begin{enumerate} +\item The path in the RTL model where that memory array is found; +\item a reference to the section that gives details about their dimensions, the number of instances and their content; +\item the number of read/write ports; +\item the read/write latency. +\end{enumerate} + +\begin{table}[h!] +\begin{center} +\caption{Summary of RAM macros in the \acs*{HPDcache}}% +\tablabel{ram_macros} +{\footnotesize\begin{tabular}{ll} +\toprule +\toprule +\multicolumn{2}{c}{\textbf{\acs{MSHR}}} \\ +\midrule +\textbf{\acs{RTL} Instance} +& \verb$.hpdcache_miss_handler_i.hpdcache_mshr_i.mshr_sram$ \\ +\midrule +\textbf{Details} +& \Secref{mshr_implementation} \\ +\midrule +\textbf{Latency} +& 1 clock cycle (RW) \\ +\midrule +\textbf{Ports} +& 1RW \\ + +\toprule +\toprule +\multicolumn{2}{c}{\textbf{Cache Directory}} \\ +\midrule +\textbf{\acs{RTL} Instance} +& \verb$.hpdcache_ctrl_i.hpdcache_memctrl_i.hpdcache_memarray_i.dir_sram[i]$ \\ +\midrule +\textbf{Details} +& \Secref{dir_data_ram_implementation} \\ +\midrule +\textbf{Latency} +& 1 clock cycle (RW) \\ +\midrule +\textbf{Ports} +& 1RW \\ + +\toprule +\toprule +\multicolumn{2}{c}{\textbf{Cache Data}} \\ +\midrule +\textbf{\acs{RTL} Instance} +& \verb$.hpdcache_ctrl_i.hpdcache_memctrl_i.hpdcache_memarray_i.data_sram[i]$ \\ +\midrule +\textbf{Details} +& \Secref{dir_data_ram_implementation} \\ +\midrule +\textbf{Latency} +& 1 clock cycle (RW) \\ +\midrule +\textbf{Ports} +& 1RW \\ +\end{tabular}} +\end{center} +\end{table} + + +\section{Implementations} +\apxlabel{implementations} + +\subsection{EPI Accelerator and RHEA Chip} + +In the context of the European Processor Initiative (EPI) project, in the accelerator stream~\cite{epac_website_2022}, this cache is used as the L1 data cache for the VRP~\cite{durand_vrp_2022} accelerator that is integrated in both, the EPI accelerator (EPAC1.5) test-chip and the RHEA chip. + +The parameters of the cache are the following on those implementations: + +\begin{tabular}{ll} + \textbf{Capacity} + & 32 KBytes \\ + \textbf{Sets} + & 128 \\ + \textbf{Ways} + & 4 \\ + \textbf{Line Size} + & 64 bytes \\ + \textbf{Physical Address Width} + & 49 bits \\ + \textbf{Write Policy} + & Write-Through \\ + \textbf{Maximum Access width (Requester-side)} + & 32 bytes per cycle \\ +\end{tabular} + +% +% bibliography +% +\printbibliography + +\end{document} diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/hpdcache_spec_changelog.tex b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/hpdcache_spec_changelog.tex new file mode 100755 index 00000000000..5b3242f0dff --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/hpdcache_spec_changelog.tex @@ -0,0 +1,35 @@ +%% +%% Copyright 2023 CEA* +%% *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) +%% +%% SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +%% +%% Licensed under the Solderpad Hardware License v 2.1 (the “License”); you +%% may not use this file except in compliance with the License, or, at your +%% option, the Apache License version 2.0. You may obtain a copy of the +%% License at +%% +%% https://solderpad.org/licenses/SHL-2.1/ +%% +%% Unless required by applicable law or agreed to in writing, any work +%% distributed under the License is distributed on an “AS IS” BASIS, WITHOUT +%% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +%% License for the specific language governing permissions and limitations +%% under the License. +%% +%% Author(s): Cesar Fuguet +%% Date: February, 2023 +%% Description: Specification document of the HPDcache hardware IP +%% +\chapter*{Preface} + +The document contains the version~\docversion~of the HPDcache. + +\section*{Preface to document version~1.0.0-draft} + +The changes in this version of the document include: +\begin{itemize} +\parskip 0pt +\itemsep 1pt +\item Initial version of the L1 data cache (HPDcache) specification. +\end{itemize} diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/hpdcache_spec_preamble.tex b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/hpdcache_spec_preamble.tex new file mode 100755 index 00000000000..fd495de6eae --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/hpdcache_spec_preamble.tex @@ -0,0 +1,351 @@ +%% +%% Copyright 2023 CEA* +%% *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) +%% +%% SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +%% +%% Licensed under the Solderpad Hardware License v 2.1 (the “License”); you +%% may not use this file except in compliance with the License, or, at your +%% option, the Apache License version 2.0. You may obtain a copy of the +%% License at +%% +%% https://solderpad.org/licenses/SHL-2.1/ +%% +%% Unless required by applicable law or agreed to in writing, any work +%% distributed under the License is distributed on an “AS IS” BASIS, WITHOUT +%% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +%% License for the specific language governing permissions and limitations +%% under the License. +%% +%% Author(s): Cesar Fuguet +%% Date: February, 2023 +%% Description: Specification document of the HPDcache hardware IP +%% + +%%% +%% Font packages +%%% +\usepackage[T1]{fontenc} +\usepackage[utf8]{inputenc} +\usepackage{fourier} + +\usepackage{amsmath} +\usepackage{amsthm} + +%\usepackage{lmodern} +\usepackage{xcolor} + +\definecolor{lightgray}{gray}{0.8} + +% use Helvetica Adobe sans serif fonts +\renewcommand{\sfdefault}{phv} + +%%% +%% Language packages +%%% +\usepackage{csquotes} +\usepackage[english]{babel} + +%%% +%% Page margins configuration +%%% +\usepackage{geometry} +\geometry{top=3cm, bottom=3cm} + +%%% +%% Figures' configuration packages and command +%%% +\usepackage{ifpdf} + +\ifpdf +\usepackage[pdftex]{graphicx} +\DeclareGraphicsExtensions{.jpg,.png,.pdf} +\else +\usepackage[dvips]{graphicx} +\DeclareGraphicsExtensions{{.eps}} +\fi + +\usepackage[font=footnotesize,position=top,skip=0pt]{caption} +\usepackage[font=footnotesize,position=top,skip=0pt]{subcaption} + +%%% +%% Tables' configuration packages +%%% +\usepackage{booktabs} +\usepackage{tabularx} +\usepackage{multirow} + +\newcolumntype{L}[1]{>{\hsize=#1\hsize\raggedright\arraybackslash}X}% +\newcolumntype{C}[1]{>{\hsize=#1\hsize\centering\arraybackslash}X}% + +%%% +%% Misc configuration +%%% +\usepackage{minitoc} +\usepackage{emptypage} % prevent headings in empty pages +\usepackage{xspace} +\usepackage{enumitem} +\usepackage[printonlyused]{acronym} +\usepackage{tikz} +\newcommand*\circled[1]{\tikz[baseline=(char.base)]{% + \node[shape=circle,draw=black,inner sep=1pt] (char){\textsf{\bfseries #1}};% + }% +} +\usetikzlibrary{shapes,arrows,chains} + +% Color boxing +\usepackage[many]{tcolorbox} + +%%% +%% Math environments +%%% +\theoremstyle{plain} +\newtheorem{property}{Property}[section] +\newtheorem{lemma}{Lemma}[property] + +\theoremstyle{definition} +\newtheorem{definition}{Definition}[section] + +\newcommand{\eqvar}[1]{$\mathit{#1}$} +\renewcommand\qedsymbol{$\blacksquare$} + +%%% +%% Table of Contents (TOC), and chapters and section titles format +%%% +\usepackage{titletoc} +\usepackage{titlesec} + +%%% +%% To Do notes +%%% +\usepackage[% + colorinlistoftodos, + prependcaption +]{todonotes} + +\ifdefined\isdraft + \newcommand{\TodoSide}[1]{\todo[color=red!20]{\textbf{To do}: #1}} + \newcommand{\NoteSide}[1]{\todo[color=green!20]{\textbf{Note}: #1}} + \newcommand{\Todo}[1]{\todo[inline, color=red!20]{\textbf{To do}: #1}} + \newcommand{\Note}[1]{\todo[inline, color=green!20]{\textbf{Note}: #1}} +\else + \newcommand{\TodoSide}[1]{} + \newcommand{\NoteSide}[1]{} + \newcommand{\Todo}[1]{} + \newcommand{\Note}[1]{} +\fi + + +% maximum depth of the table of contents (2: subsection) + +\setcounter{tocdepth}{2} + +% format for general table of contents +\titlecontents{chapter}% +[1.5em]% +{\addvspace{1em plus 0pt minus 0pt}\bfseries}% +{\contentslabel{1.3em}}% +{\hspace{-1.3em}}% +{\hfill\contentspage}% +[\addvspace{0pt}] + +\titlecontents{section}% +[3.8em]% +{\addvspace{.4em plus 0pt minus 0pt}\bfseries}% +{\contentslabel{2.3em}}% +{}% +{\titlerule*[0.75em]{\normalfont.}\contentspage} + +\titlecontents{subsection}% +[7.0em]% +{\addvspace{.2em plus 0pt minus 0pt}}% +{\contentslabel{3.2em}}% +{}% +{\titlerule*[.75em]{.}\contentspage} + +% format for partial table of contents (at each chapter) + +\titlecontents{psection}% +[2.3em]% +{\addvspace{.4em plus 0pt minus 0pt}\bfseries}% +{\contentslabel{2.3em}}% +{}% +{\titlerule*[.75em]{\normalfont.}\contentspage} + +\titlecontents{psubsection}% +[5.5em]% +{\addvspace{.2em plus 0pt minus 0pt}}% +{\contentslabel{3.2em}}% +{}% +{\titlerule*[.75em]{.}\contentspage} + +\titlecontents{figure}% +[3.8em]% +{\addvspace{.4em plus 0pt minus 0pt}\normalfont}% +{\contentslabel{2.3em}}% +{}% +{\titlerule*[0.75em]{\normalfont.}\contentspage} + +\titlecontents{table}% +[3.8em]% +{\addvspace{.4em plus 0pt minus 0pt}\normalfont}% +{\contentslabel{2.3em}}% +{}% +{\titlerule*[0.75em]{\normalfont.}\contentspage} + +% insert this command at each chapter's beginning to add a partial TOC + +\newcommand{\chaptertoc}{% + \vspace*{1.25ex}% + \vbox{\bfseries\Large Contents}% + \vspace*{1ex}\titlerule% + \normalfont\normalsize% + \startcontents[sections]% + \printcontents[sections]{p}{1}{}% + \vspace*{1ex}\titlerule\vspace*{1ex}% + \newpage} + +\titleformat{\chapter}[display]{\bfseries\huge}% +{\chaptertitlename~\thechapter}{.75ex}% +{\titlerule[2pt]\vspace*{.75ex}\filright}% +[]%[\vspace*{.75ex}\titlerule] +\titlespacing*{\chapter}{0pt}{30pt}{20pt}[0pt] + +\titleformat{\section}[hang]{\normalfont\Large\bfseries}{% + \thesection}{1em}{} +\titlespacing*{\section}{0pt}{% + 3.5ex plus 1ex minus .2ex}{2.3ex plus .2ex}[0pt] + +\titleformat{\subsection}[hang]{\normalfont\large\bfseries}{% + \thesubsection}{1em}{} +\titlespacing*{\subsection}{0pt}{% + 3.25ex plus 1ex minus .2ex}{1.5ex plus .2ex}[0pt] + +\titleformat{\subsubsection}[hang]{\normalfont\normalsize\bfseries}{% + \thesubsubsection}{1em}{} +\titlespacing*{\subsubsection}{0pt}{% + 3.25ex plus 1ex minus .2ex}{1.5ex plus .2ex}[0pt] + +\titleformat{\paragraph}[runin]{\normalfont\normalsize\bfseries}{% + \theparagraph}{1em}{}[] +\titlespacing*{\paragraph}{0pt}{% + 3.25ex plus 1ex minus .2ex}{1em} + +\titleformat{\subparagraph}[runin]{\normalfont\normalsize\bfseries}{% + \thesubparagraph}{1em}{}[] +\titlespacing*{\subparagraph}{\parindent}{% + 3.25ex plus 1ex minus .2ex}{1em} + +%%% +%% Header / footer configuration +%%% +\usepackage{fancyhdr} + +\fancypagestyle{fancystyle}{% + \fancyhf{}% clear header and footer fields + \fancyhead[RO,LE]{\footnotesize\sffamily\nouppercase{\rightmark}}% + \fancyhead[RE,LO]{\footnotesize\sffamily\mbox{}Version~\docversion}% + \fancyfoot[LO,RE]{\footnotesize\sffamily\mbox{}\docauthor}% + \fancyfoot[CE,CO]{\footnotesize\sffamily\mbox{}Copyright~\textcopyright~2023~Commissariat à l'Energie Atomique et aux Energies Alternatives (CEA)}% + \fancyfoot[RO,LE]{\footnotesize\sffamily\mbox{}\thepage}% + \renewcommand{\headrulewidth}{.6pt}% + \renewcommand{\footrulewidth}{.6pt}% +} + +\fancypagestyle{plain}{% + \fancyhf{}% clear header and footer fields + \fancyfoot[C]{\normalfont\sffamily\thepage}% + \renewcommand{\headrulewidth}{0pt}% + \renewcommand{\footrulewidth}{0pt}% +} + +\renewcommand{\sectionmark}{\markright} + +\addtolength{\headheight}{\baselineskip} + +%%% +%% Bibliography configuration +%%% +\usepackage[ + backend=biber, + bibencoding=utf8, + citestyle=ieee, + style=ieee +]{biblatex} + +%%% +%% Hyperref package (must be declared at last to avoid conflicts) +%%% +\usepackage[pdfusetitle]{hyperref} +\usepackage{url} +\hypersetup{% +colorlinks,% +linkcolor=blue% +} + +%% Plot drawing package +\usepackage{pgfplots} + +%% Programming code formatting +\usepackage{listings} + +%% Algorithm formatting +\usepackage{algorithm} +\usepackage{algorithmic} + +%% Formatting of network protocol specification +\usepackage{bytefield} + + +%%% +%% Definition of command aliases +%%% +%%% +%% Add vertical space between paragraphs and remove indentation +%%% +\usepackage{parskip} + +% keep the parskip for theorems (AMS packages). +\makeatletter +\def\thm@space@setup{% + \thm@preskip=\parskip \thm@postskip=0pt +} +\makeatother + +%%% +%% Label and reference commands +%%% +\newcommand{\figfont}[1]{\textsf{\bfseries #1}} + +\newcommand{\figlabel}[1]{\label{fig:#1}} +\newcommand{\Figref}[1]{\hyperref[fig:#1]{\mbox{Figure~\ref{fig:#1}}}} +\newcommand{\figref}[1]{\hyperref[fig:#1]{\mbox{figure~\ref{fig:#1}}}} +\newcommand{\fighypref}[2]{\hyperref[fig:#1]{#2}} +\newcommand{\chalabel}[1]{\label{cha:#1}} +\newcommand{\Charef}[1]{\hyperref[cha:#1]{\mbox{Chapter~\ref{cha:#1}}}} +\newcommand{\charef}[1]{\hyperref[cha:#1]{\mbox{chapter~\ref{cha:#1}}}} +\newcommand{\chafullref}[1]{Chapter~\ref{cha:#1}-\nameref{cha:#1}} +\newcommand{\chahypref}[2]{\hyperref[cha:#1]{#2}} +\newcommand{\seclabel}[1]{\label{sec:#1}} +\newcommand{\Secref}[1]{\hyperref[sec:#1]{\mbox{Section~\ref{sec:#1}}}} +\newcommand{\secref}[1]{\hyperref[sec:#1]{\mbox{section~\ref{sec:#1}}}} +\newcommand{\sechypref}[2]{\hyperref[sec:#1]{#2}} +\newcommand{\apxlabel}[1]{\label{apx:#1}} +\newcommand{\Apxref}[1]{\hyperref[apx:#1]{\mbox{Appendix~\ref{apx:#1}}}} +\newcommand{\apxref}[1]{\hyperref[apx:#1]{\mbox{appendix~\ref{apx:#1}}}} +\newcommand{\apxhypref}[2]{\hyperref[apx:#1]{#2}} +\newcommand{\tablabel}[1]{\label{tab:#1}} +\newcommand{\Tabref}[1]{\hyperref[tab:#1]{\mbox{Table~\ref{tab:#1}}}} +\newcommand{\tabref}[1]{\hyperref[tab:#1]{\mbox{table~\ref{tab:#1}}}} +\newcommand{\tabhypref}[2]{\hyperref[tab:#1]{#2}} +\newcommand{\alglabel}[1]{\label{alg:#1}} +\newcommand{\Algref}[1]{\hyperref[alg:#1]{\mbox{Algorithm~\ref{alg:#1}}}} +\newcommand{\algref}[1]{\hyperref[alg:#1]{\mbox{algorithm~\ref{alg:#1}}}} +\newcommand{\alghypref}[2]{\hyperref[alg:#1]{#2}} +\newcommand{\prplabel}[1]{\label{prp:#1}} +\newcommand{\prpref}[1]{\hyperref[prp:#1]{\mbox{Property~\ref{prp:#1}}}} +\newcommand{\lemlabel}[1]{\label{lem:#1}} +\newcommand{\lemref}[1]{\hyperref[lem:#1]{\mbox{Lemma~\ref{lem:#1}}}} +\newcommand{\deflabel}[1]{\label{def:#1}} +\newcommand{\defref}[1]{\hyperref[def:#1]{\mbox{Definition~\ref{def:#1}}}} + diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/exported/wave_back_to_back.svg b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/exported/wave_back_to_back.svg new file mode 100755 index 00000000000..fea7140c0ab --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/exported/wave_back_to_back.svg @@ -0,0 +1,4 @@ + + + +012345CLKPAYLOADdat0dat1VALIDREADY \ No newline at end of file diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/exported/wave_ready_before_valid.svg b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/exported/wave_ready_before_valid.svg new file mode 100755 index 00000000000..a7dd0fa115b --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/exported/wave_ready_before_valid.svg @@ -0,0 +1,4 @@ + + + +01234CLKPAYLOADdataVALIDREADY \ No newline at end of file diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/exported/wave_ready_when_valid.svg b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/exported/wave_ready_when_valid.svg new file mode 100755 index 00000000000..587f606ee53 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/exported/wave_ready_when_valid.svg @@ -0,0 +1,4 @@ + + + +01234CLKPAYLOADdataVALIDREADY \ No newline at end of file diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/exported/wave_valid_before_ready.svg b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/exported/wave_valid_before_ready.svg new file mode 100755 index 00000000000..ccc7b4f67db --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/exported/wave_valid_before_ready.svg @@ -0,0 +1,4 @@ + + + +01234CLKPAYLOADdataVALIDREADY \ No newline at end of file diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/hpdcache_core.svg b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/hpdcache_core.svg new file mode 100644 index 00000000000..76db8edfe47 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/hpdcache_core.svg @@ -0,0 +1,3436 @@ + + + + + HPDcache Core + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + HPDcache Core + February, 2023 + + + Cesar Fuguet + + + + + Commissaria a l'Energie Atomique et aux Energies Alternatives (CEA) + + + + + + + + English + + + + + + + + + + + + Miss Handler + + + + + + + + + +  Cache Directory and Data + + + + + + + + + + + + + + + + + + ways + + + sets + + v tag + + + + + = + + + + + + data to processor + + + + + + + + + tags + + @tag + hit + + + + + + + valids + @set + + way1 + way0 + + + + sets xwords/x_cuts + + {@set,@word[2]} + + + + + + + @word[1:0] + + + + + en[3] + en[2] + en[1] + en[0] + + 64 bits + + + + + @word[1:0] + + + + + + + + + way1 + way0 + + + way1 + way0 + + + way1 + way0 + + set0:word3 + set0:word2 + set0:word1 + set0:word0 + + + set0:word7 + set0:word6 + set0:word5 + set0:word4 + + + + set1:word3 + set1:word2 + set1:word1 + set1:word0 + + + set1:word7 + set1:word6 + set1:word5 + set1:word4 + + + set127:word3 + set127:word2 + set127:word1 + set127:word0 + + + set127:word7 + set127:word6 + set127:word5 + set127:word4 + + way3 + way2 + way3 + way2 + way3 + way2 + way3 + way2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + way selection + word selection + + + + + MSHR + + + + Uncacheable & AMOHandler (UC) + + + + + + + + + + + CacheManagementOperationHandler (CMO) + + + + + + + + + + + + Cache Controller + + ARBITER + + REFILL + + + + Write Buffer(WBUF) + + + Data + + + + Dir + + + + CONFIGURATION + + READMISSREQUEST + HPDcache Core + REQ + + RSP + + + + + READMISSRESPONSE + + ProtocolEngineStage 0 + + + + + + + + + + + + READUNCACHEDREQUEST + READUNCACHEDRESPONSE + + + WRITE/AMOUNCACHEDREQUEST + WRITE/AMOUNCACHEDRESPONSE + + + WRITEREQUEST + WRITERESPONSE + + ProtocolEngineStage 1 + + + + + + + + ProtocolEngineStage 2 + + + + + + + + + + + + + + ReplayTable(RTAB) +   + + diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/hpdcache_data_ram_organization.svg b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/hpdcache_data_ram_organization.svg new file mode 100755 index 00000000000..915dd28aa0a --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/hpdcache_data_ram_organization.svg @@ -0,0 +1,2344 @@ + + + + + HPDcache Data RAM Organization + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + HPDcache Data RAM Organization + February, 2023 + + + Cesar Fuguet + + + + + Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + + + English + + + + + + + + + + + + + + + + + + + + + + ways + + + sets + + + tag + + + + set + word + + tag + @ + 64-bits address from processor + 63 + 0 + + + + + = + + + + + + data to processor + + + + + + + + + tags + + @tag + hit + + + + + + + valids + @set + + + 55 + 56 + + 2 + 5 + 6 + 12 + 13 + + + way1 + way0 + + + + + + sets xwords/x_cuts + + {@set,@word[2]} + + + + + + + @word[1:0] + + + + + en[3] + en[2] + en[1] + en[0] + + 64 bits + + + + + @word[1:0] + + + + + + + + + way1 + way0 + + + way1 + way0 + + + way1 + way0 + + set0:word3 + set0:word2 + set0:word1 + set0:word0 + + + set0:word7 + set0:word6 + set0:word5 + set0:word4 + + + + set1:word3 + set1:word2 + set1:word1 + set1:word0 + + + set1:word7 + set1:word6 + set1:word5 + set1:word4 + + + set127:word3 + set127:word2 + set127:word1 + set127:word0 + + + set127:word7 + set127:word6 + set127:word5 + set127:word4 + + way3 + way2 + way3 + way2 + way3 + way2 + way3 + way2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + way selection + word selection + 3 + + + diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/hpdcache_request_address_data_alignment.svg b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/hpdcache_request_address_data_alignment.svg new file mode 100755 index 00000000000..5d9119ecf1d --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/hpdcache_request_address_data_alignment.svg @@ -0,0 +1,2016 @@ + + + HPDcache Request Address Data Alignment + + + + image/svg+xml + + HPDcache Request Address Data Alignment + February, 2023 + + + Cesar Fuguet + + + + + Commissariat a l'Energie Atomique et aux Energies Alternatives + + + English + + + + + + + + + @8004 + + + + + + WDATA + + BE + + + + 1 + + + + + + + + + 0 + + + + + + + + + 0xAA + + 0xBB + + 0xCC + + 0xDD + + + + + + + + + + @8008 + + 0 + + 0 + + + + + + 0 + + 0 + + + + + + 0xBB + + 0xAA + + 0x99 + + 0x88 + + 1 + + 1 + + 1 + + 1 + + 0xCC + + 0xDD + + 0xEE + + 0xFF + + 1 + + 1 + + 1 + + 1 + + XX + + XX + + + + 0 + + 0 + + 0 + + 0 + + @8009 + + + + 0 + + 56 + + 127 + + 64 + + + 8 + + 0 + + 1 + + 15 + + 8 + + 7 + + + + + + + + + + + + + + + + + + 0xAA + + + + + 1 + + + + + + + + + + + WDATA + + WDATA + + BE + + BE + + + XX + XX + XX + XX + XX + XX + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 1 + 1 + 1 + 0 + 0 + 0 + 0 + XX + XX + XX + XX + XX + XX + XX + XX + XX + XX + XX + XX + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + XX + XX + XX + XX + XX + XX + XX + XX + XX + XX + XX + XX + XX + XX + XX + 0 + + 56 + 127 + + 64 + + + 8 + 0 + + 1 + 15 + + 8 + + 7 + 0 + 56 + 127 + 64 + 8 + 0 + + 1 + 15 + + 8 + + 7 + SIZE=3 (8 bytes) + SIZE=2 (4 bytes) + SIZE=0 (1 byte) + diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/hpdcache_request_arbiter.svg b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/hpdcache_request_arbiter.svg new file mode 100644 index 00000000000..9fe73bbad77 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/hpdcache_request_arbiter.svg @@ -0,0 +1,428 @@ + + + HPDcache Request Arbiter + + + + image/svg+xml + + HPDcache Request Arbiter + February, 2023 + + + Cesar Fuguet + + + + + Commissariat a l'Energie Atomique et aux Energies Alternatives + + + English + + + + + + + + + + + + + + + + + + + + + + + + + 0 + 1 + 2 + N-1 + + + N + HPDcache + + CSRs + + + HPDcacheCore + MemoryInterface + Requester0 + Requester1 + Requester2 + RequesterN-1 + ... + HardwareMemoryPrefetcher + 1 request/cycle + Fixed-Priority Arbiter + diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/wave_back_to_back.json b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/wave_back_to_back.json new file mode 100755 index 00000000000..c4fecb2662e --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/wave_back_to_back.json @@ -0,0 +1,10 @@ +{signal: [ + {name: 'CLK', wave: 'p....'}, + {name: 'PAYLOAD', wave: 'xx22x', data: ['dat0', 'dat1']}, + {name: 'VALID', wave: '0.1.0'}, + {name: 'READY', wave: '01..0'} +], + head:{ + tick:0 + } +} diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/wave_ready_before_valid.json b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/wave_ready_before_valid.json new file mode 100755 index 00000000000..fe8a44623b7 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/wave_ready_before_valid.json @@ -0,0 +1,10 @@ +{signal: [ + {name: 'CLK', wave: 'p...'}, + {name: 'PAYLOAD', wave: 'x.2x', data: ['data']}, + {name: 'VALID', wave: '0.10'}, + {name: 'READY', wave: '01.0'} +], + head:{ + tick:0 + } +} diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/wave_ready_when_valid.json b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/wave_ready_when_valid.json new file mode 100755 index 00000000000..35075278afa --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/wave_ready_when_valid.json @@ -0,0 +1,10 @@ +{signal: [ + {name: 'CLK', wave: 'p...'}, + {name: 'PAYLOAD', wave: 'x.2x', data: ['data']}, + {name: 'VALID', wave: '0.10'}, + {name: 'READY', wave: '0.10'} +], + head:{ + tick:0 + } +} diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/wave_valid_before_ready.json b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/wave_valid_before_ready.json new file mode 100755 index 00000000000..16c313eb190 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/source/images/wave_valid_before_ready.json @@ -0,0 +1,10 @@ +{signal: [ + {name: 'CLK', wave: 'p...'}, + {name: 'PAYLOAD', wave: 'x2.x', data: ['data']}, + {name: 'VALID', wave: '01.0'}, + {name: 'READY', wave: '0.10'} +], + head:{ + tick:0 + } +} diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/supplement/download_wavedrom.sh b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/supplement/download_wavedrom.sh new file mode 100644 index 00000000000..7dbf9c3c744 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/supplement/download_wavedrom.sh @@ -0,0 +1,22 @@ +#!/bin/bash +WAVEDROM_ARCHIVE=wavedrom-editor-v2.9.1-linux-x64 +WAVEDROM_URL=https://github.com/wavedrom/wavedrom.github.io/releases/download/v2.9.1/${WAVEDROM_ARCHIVE}.tar.gz + +WAVEDROM_CLI=package/wavedrom/wavedrom-cli.js +WAVEDROM_CLI_URL=https://github.com/wavedrom/cli/releases/download/v0.3.1/wavedrom-cli.js + +echo -e 'Download the archive' +if [[ ! -e ${WAVEDROM_ARCHIVE}.tar.gz ]]; then + wget -q ${WAVEDROM_URL} +fi + +echo -e 'Decompress the archive' +if [[ ! -e package/wavedrom/${WAVEDROM_ARCHIVE} ]]; then + mkdir -p package/wavedrom + tar xzf ${WAVEDROM_ARCHIVE} -C package/wavedrom +fi + +echo -e 'Download wavedrom command-line client' +if [[ ! -e ${WAVEDROM_CLI} ]]; then + wget -q -O ${WAVEDROM_CLI} ${WAVEDROM_CLI_URL} +fi diff --git a/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/version b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/version new file mode 100644 index 00000000000..2f79e10c82a --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/docs/hpdcache_spec_document/version @@ -0,0 +1 @@ +1.1.0-draft diff --git a/vendor/openhwgroup/cvhpdcache/rtl/hpdcache.Flist b/vendor/openhwgroup/cvhpdcache/rtl/hpdcache.Flist new file mode 100644 index 00000000000..fa7a1aa03a7 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/hpdcache.Flist @@ -0,0 +1,64 @@ +// +// Copyright 2023 CEA* +// *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you +// may not use this file except in compliance with the License, or, at your +// option, the Apache License version 2.0. You may obtain a copy of the +// License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work +// distributed under the License is distributed on an “AS IS” BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. +// +// +// Authors : Cesar Fuguet +// Creation Date : January, 2023 +// Description : File list for the HPDcache +// History : +// ++incdir+${HPDCACHE_DIR}/rtl/include +${HPDCACHE_DIR}/rtl/src/hpdcache_pkg.sv +${HPDCACHE_DIR}/rtl/src/common/hpdcache_demux.sv +${HPDCACHE_DIR}/rtl/src/common/hpdcache_fifo_reg.sv +${HPDCACHE_DIR}/rtl/src/common/hpdcache_fxarb.sv +${HPDCACHE_DIR}/rtl/src/common/hpdcache_rrarb.sv +${HPDCACHE_DIR}/rtl/src/common/hpdcache_mux.sv +${HPDCACHE_DIR}/rtl/src/common/hpdcache_prio_1hot_encoder.sv +${HPDCACHE_DIR}/rtl/src/common/hpdcache_sram.sv +${HPDCACHE_DIR}/rtl/src/common/hpdcache_sram_wbyteenable.sv +${HPDCACHE_DIR}/rtl/src/common/hpdcache_sram_wmask.sv +${HPDCACHE_DIR}/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv +${HPDCACHE_DIR}/rtl/src/common/hpdcache_regbank_wmask_1rw.sv +${HPDCACHE_DIR}/rtl/src/common/hpdcache_data_downsize.sv +${HPDCACHE_DIR}/rtl/src/common/hpdcache_data_upsize.sv +${HPDCACHE_DIR}/rtl/src/hwpf_stride/hwpf_stride_pkg.sv +${HPDCACHE_DIR}/rtl/src/hwpf_stride/hwpf_stride.sv +${HPDCACHE_DIR}/rtl/src/hwpf_stride/hwpf_stride_arb.sv +${HPDCACHE_DIR}/rtl/src/hwpf_stride/hwpf_stride_snooper.sv +${HPDCACHE_DIR}/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv +${HPDCACHE_DIR}/rtl/src/hpdcache.sv +${HPDCACHE_DIR}/rtl/src/hpdcache_amo.sv +${HPDCACHE_DIR}/rtl/src/hpdcache_cmo.sv +${HPDCACHE_DIR}/rtl/src/hpdcache_core_arbiter.sv +${HPDCACHE_DIR}/rtl/src/hpdcache_ctrl.sv +${HPDCACHE_DIR}/rtl/src/hpdcache_ctrl_pe.sv +${HPDCACHE_DIR}/rtl/src/hpdcache_memarray.sv +${HPDCACHE_DIR}/rtl/src/hpdcache_memctrl.sv +${HPDCACHE_DIR}/rtl/src/hpdcache_miss_handler.sv +${HPDCACHE_DIR}/rtl/src/hpdcache_mshr.sv +${HPDCACHE_DIR}/rtl/src/hpdcache_mshr_to_cache_set.sv +${HPDCACHE_DIR}/rtl/src/hpdcache_plru.sv +${HPDCACHE_DIR}/rtl/src/hpdcache_rtab.sv +${HPDCACHE_DIR}/rtl/src/hpdcache_uncached.sv +${HPDCACHE_DIR}/rtl/src/hpdcache_wbuf.sv +${HPDCACHE_DIR}/rtl/src/hpdcache_wbuf_wrapper.sv +${HPDCACHE_DIR}/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv +${HPDCACHE_DIR}/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv +${HPDCACHE_DIR}/rtl/src/utils/hpdcache_mem_resp_demux.sv diff --git a/vendor/openhwgroup/cvhpdcache/rtl/hpdcache_cva6.Flist b/vendor/openhwgroup/cvhpdcache/rtl/hpdcache_cva6.Flist new file mode 100644 index 00000000000..e0a138eb441 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/hpdcache_cva6.Flist @@ -0,0 +1,35 @@ +// +// Copyright 2023 CEA* +// *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you +// may not use this file except in compliance with the License, or, at your +// option, the Apache License version 2.0. You may obtain a copy of the +// License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work +// distributed under the License is distributed on an “AS IS” BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. +// +// +// Authors : Cesar Fuguet +// Creation Date : January, 2023 +// Description : File list for the HPDcache and adapters for the CVA6 core +// History : +// +${HPDCACHE_DIR}/rtl/src/target/cva6/cva6_hpdcache_params_pkg.sv +-F ${HPDCACHE_DIR}/rtl/hpdcache.Flist +${HPDCACHE_DIR}/rtl/src/utils/hpdcache_mem_to_axi_read.sv +${HPDCACHE_DIR}/rtl/src/utils/hpdcache_mem_to_axi_write.sv +${HPDCACHE_DIR}/rtl/src/target/cva6/cva6_hpdcache_subsystem.sv +${HPDCACHE_DIR}/rtl/src/target/cva6/cva6_hpdcache_subsystem_axi_arbiter.sv +${HPDCACHE_DIR}/rtl/src/target/cva6/cva6_hpdcache_if_adapter.sv + +//+define+HPDCACHE_ENABLE_CMO +//${HPDCACHE_DIR}/rtl/src/target/cva6/cva6_hpdcache_cmo_if_adapter.sv diff --git a/vendor/openhwgroup/cvhpdcache/rtl/include/hpdcache_typedef.svh b/vendor/openhwgroup/cvhpdcache/rtl/include/hpdcache_typedef.svh new file mode 100644 index 00000000000..5e92a791a81 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/include/hpdcache_typedef.svh @@ -0,0 +1,62 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : February, 2023 + * Description : HPDcache Types' Definition + * History : + */ +`ifndef __HPDCACHE_TYPEDEF_SVH__ +`define __HPDCACHE_TYPEDEF_SVH__ + +`define HPDCACHE_TYPEDEF_MEM_REQ_T(__name__, addr_t, id_t) \ + typedef struct packed { \ + addr_t mem_req_addr; \ + hpdcache_pkg::hpdcache_mem_len_t mem_req_len; \ + hpdcache_pkg::hpdcache_mem_size_t mem_req_size; \ + id_t mem_req_id; \ + hpdcache_pkg::hpdcache_mem_command_e mem_req_command; \ + hpdcache_pkg::hpdcache_mem_atomic_e mem_req_atomic; \ + logic mem_req_cacheable; \ + } __name__ + +`define HPDCACHE_TYPEDEF_MEM_RESP_R_T(__name__, id_t, data_t) \ + typedef struct packed { \ + hpdcache_pkg::hpdcache_mem_error_e mem_resp_r_error; \ + id_t mem_resp_r_id; \ + data_t mem_resp_r_data; \ + logic mem_resp_r_last; \ + } __name__ + +`define HPDCACHE_TYPEDEF_MEM_REQ_W_T(__name__, data_t, be_t) \ + typedef struct packed { \ + data_t mem_req_w_data; \ + be_t mem_req_w_be; \ + logic mem_req_w_last; \ + } __name__ + +`define HPDCACHE_TYPEDEF_MEM_RESP_W_T(__name__, id_t) \ + typedef struct packed { \ + logic mem_resp_w_is_atomic; \ + hpdcache_pkg::hpdcache_mem_error_e mem_resp_w_error; \ + id_t mem_resp_w_id; \ + } __name__ + +`endif diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_data_downsize.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_data_downsize.sv new file mode 100644 index 00000000000..33816790e93 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_data_downsize.sv @@ -0,0 +1,187 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : November 22, 2022 + * Description : Refill data downsize + * History : + */ +module hpdcache_data_downsize +// {{{ +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter int WR_WIDTH = 0, + parameter int RD_WIDTH = 0, + parameter int DEPTH = 0, + + localparam type wdata_t = logic [WR_WIDTH-1:0], + localparam type rdata_t = logic [RD_WIDTH-1:0] +) +// }}} +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + input logic w_i, + output logic wok_o, + input wdata_t wdata_i, + + input logic r_i, + output logic rok_o, + output rdata_t rdata_o +); +// }}} +// Architecture +// {{{ + // Local definitions + // {{{ + localparam int RD_WORDS = WR_WIDTH/RD_WIDTH; + localparam int PTR_WIDTH = $clog2(DEPTH); + localparam int WORDCNT_WIDTH = $clog2(RD_WORDS); + typedef logic [PTR_WIDTH-1:0] bufptr_t; + typedef logic [WORDCNT_WIDTH-1:0] wordptr_t; + typedef logic [PTR_WIDTH:0] occupancy_t; + // }}} + + // Internal registers and signals + // {{{ + rdata_t [DEPTH-1:0][RD_WORDS-1:0] buf_q; + bufptr_t wrptr_q, wrptr_d; + bufptr_t rdptr_q, rdptr_d; + occupancy_t used_q, used_d; + logic used_inc, used_dec; + wordptr_t [DEPTH-1:0] words_q, words_d; + logic words_set, words_dec; + logic full, empty; + // }}} + + // Control-Path + // {{{ + assign full = (hpdcache_uint'(used_q) == DEPTH), + empty = (used_q == 0), + wok_o = ~full, + rok_o = ~empty; + + always_comb + begin : write_comb + wrptr_d = wrptr_q; + used_inc = 1'b0; + words_set = 1'b0; + if (w_i && wok_o) begin + used_inc = 1'b1; + words_set = 1'b1; + if (hpdcache_uint'(wrptr_q) == (DEPTH-1)) begin + wrptr_d = 0; + end else begin + wrptr_d = wrptr_q + 1; + end + end + end + + always_comb + begin : read_comb + rdptr_d = rdptr_q; + words_dec = 1'b0; + used_dec = 1'b0; + if (r_i && rok_o) begin + words_dec = (words_q[rdptr_q] > 0); + if (words_q[rdptr_q] == 0) begin + used_dec = 1'b1; + if (hpdcache_uint'(rdptr_q) == (DEPTH-1)) begin + rdptr_d = 0; + end else begin + rdptr_d = rdptr_q + 1; + end + end + end + end + + always_comb + begin : used_comb + case ({used_inc, used_dec}) + 2'b10 : used_d = used_q + 1; + 2'b01 : used_d = used_q - 1; + default: used_d = used_q; + endcase + end + + always_comb + begin : words_comb + words_d = words_q; + if (words_set) begin + words_d[wrptr_q] = wordptr_t'(RD_WORDS - 1); + end + if (words_dec) begin + words_d[rdptr_q] = words_q[rdptr_q] - 1; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : ctrl_ff + if (!rst_ni) begin + rdptr_q <= 0; + wrptr_q <= 0; + used_q <= 0; + words_q <= 0; + end else begin + rdptr_q <= rdptr_d; + wrptr_q <= wrptr_d; + used_q <= used_d; + words_q <= words_d; + end + end + // }}} + + // Data-Path + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin : buf_ff + if (!rst_ni) begin + buf_q <= '0; + end else begin + if (words_set) begin + buf_q[wrptr_q] <= wdata_i; + end + end + end + + assign rdata_o = buf_q[rdptr_q][RD_WORDS - hpdcache_uint'(words_q[rdptr_q]) - 1]; + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial + begin : initial_assertions + assert (DEPTH > 0) else $error("DEPTH must be greater than 0"); + assert (WR_WIDTH > 0) else $error("WR_WIDTH must be greater than 0"); + assert (RD_WIDTH > 0) else $error("RD_WIDTH must be greater than 0"); + assert (RD_WIDTH < WR_WIDTH) else $error("RD_WIDTH must be less to WR_WIDTH"); + assert ((WR_WIDTH % RD_WIDTH) == 0) else $error("WR_WIDTH must be a multiple RD_WIDTH"); + end + // pragma translate_on + // }}} +// }}} +endmodule +// }}} diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_data_upsize.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_data_upsize.sv new file mode 100644 index 00000000000..1017dee3b70 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_data_upsize.sv @@ -0,0 +1,189 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : November 22, 2022 + * Description : Refill data upsize + * History : + */ +module hpdcache_data_upsize +// {{{ +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter int WR_WIDTH = 0, + parameter int RD_WIDTH = 0, + parameter int DEPTH = 0, + + localparam type wdata_t = logic [WR_WIDTH-1:0], + localparam type rdata_t = logic [RD_WIDTH-1:0] +) +// }}} +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + input logic w_i, + input logic wlast_i, + output logic wok_o, + input wdata_t wdata_i, + + input logic r_i, + output logic rok_o, + output rdata_t rdata_o +); +// }}} +// Architecture +// {{{ + // Local definitions + // {{{ + localparam int WR_WORDS = RD_WIDTH/WR_WIDTH; + localparam int PTR_WIDTH = $clog2(DEPTH); + localparam int WORDCNT_WIDTH = $clog2(WR_WORDS); + typedef logic [PTR_WIDTH-1:0] bufptr_t; + typedef logic [WORDCNT_WIDTH-1:0] wordptr_t; + typedef logic [PTR_WIDTH:0] occupancy_t; + // }}} + + // Internal registers and signals + // {{{ + wdata_t [DEPTH-1:0][WR_WORDS-1:0] buf_q; + bufptr_t wrptr_q, wrptr_d; + bufptr_t rdptr_q, rdptr_d; + occupancy_t used_q, used_d; + logic used_inc, used_dec; + wordptr_t [DEPTH-1:0] words_q, words_d; + logic words_inc, words_reset; + logic full, empty; + logic shift; + // }}} + + // Control-Path + // {{{ + assign full = (hpdcache_uint'(used_q) == DEPTH), + empty = (used_q == 0), + wok_o = ~full, + rok_o = ~empty; + + always_comb + begin : write_comb + wrptr_d = wrptr_q; + used_inc = 1'b0; + words_inc = 1'b0; + shift = 1'b0; + if (w_i && wok_o) begin + shift = 1'b1; + words_inc = (hpdcache_uint'(words_q[wrptr_q]) < (WR_WORDS-1)); + if (hpdcache_uint'(words_q[wrptr_q]) == (WR_WORDS-1) || wlast_i) begin + used_inc = 1'b1; + if (hpdcache_uint'(wrptr_q) == (DEPTH-1)) begin + wrptr_d = 0; + end else begin + wrptr_d = wrptr_q + 1; + end + end + end + end + + always_comb + begin : read_comb + rdptr_d = rdptr_q; + used_dec = 1'b0; + words_reset = 1'b0; + if (r_i && rok_o) begin + used_dec = 1'b1; + words_reset = 1'b1; + if (hpdcache_uint'(rdptr_q) == (DEPTH-1)) begin + rdptr_d = 0; + end else begin + rdptr_d = rdptr_q + 1; + end + end + end + + always_comb + begin : used_comb + case ({used_inc, used_dec}) + 2'b10 : used_d = used_q + 1; + 2'b01 : used_d = used_q - 1; + default: used_d = used_q; + endcase + end + + always_comb + begin : words_comb + words_d = words_q; + if (words_inc) begin + words_d[wrptr_q] = words_q[wrptr_q] + 1; + end + if (words_reset) begin + words_d[rdptr_q] = 0; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : ctrl_ff + if (!rst_ni) begin + rdptr_q <= 0; + wrptr_q <= 0; + used_q <= 0; + words_q <= '0; + end else begin + rdptr_q <= rdptr_d; + wrptr_q <= wrptr_d; + used_q <= used_d; + words_q <= words_d; + end + end + // }}} + + // Data-Path + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin : buf_ff + if (!rst_ni) begin + buf_q <= '0; + end else begin + if (shift) buf_q[wrptr_q][words_q[wrptr_q]] <= wdata_i; + end + end + + assign rdata_o = buf_q[rdptr_q]; + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial + begin : initial_assertions + assert (DEPTH > 0) else $error("DEPTH must be greater than 0"); + assert (WR_WIDTH > 0) else $error("WR_WIDTH must be greater than 0"); + assert (RD_WIDTH > 0) else $error("RD_WIDTH must be greater than 0"); + assert (WR_WIDTH < RD_WIDTH) else $error("WR_WIDTH must be less to RD_WIDTH"); + assert ((RD_WIDTH % WR_WIDTH) == 0) else $error("RD_WIDTH must be a multiple WR_WIDTH"); + end + // pragma translate_on + // }}} +// }}} +endmodule +// }}} diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_demux.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_demux.sv new file mode 100644 index 00000000000..3be21e0814e --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_demux.sv @@ -0,0 +1,69 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Simple multiplexor + * History : + */ +module hpdcache_demux +// Parameters +// {{{ +#( + // Number of outputs + parameter int unsigned NOUTPUT = 0, + + // Width in bits of each input + parameter int unsigned DATA_WIDTH = 0, + + // Selector signal is one-hot encoded + parameter bit ONE_HOT_SEL = 0, + + // Compute the width of the selection signal + localparam int unsigned NOUTPUT_LOG2 = $clog2(NOUTPUT), + localparam int unsigned SEL_WIDTH = ONE_HOT_SEL ? NOUTPUT : NOUTPUT_LOG2, + + localparam type data_t = logic [DATA_WIDTH-1:0], + localparam type sel_t = logic [SEL_WIDTH-1:0] +) +// }}} + +// Ports +// {{{ +( + input data_t data_i, + input sel_t sel_i, + output data_t [NOUTPUT-1:0] data_o +); +// }}} + + generate + always_comb + begin : demux_comb + for (int unsigned i = 0; i < NOUTPUT; i++) begin + if (!ONE_HOT_SEL) begin + data_o[i] = (sel_t'(i) == sel_i) ? data_i : '0; + end else begin + data_o[i] = sel_i[i] ? data_i : '0; + end + end + end + endgenerate +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_fifo_reg.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_fifo_reg.sv new file mode 100644 index 00000000000..5e6eabf5ed1 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_fifo_reg.sv @@ -0,0 +1,144 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : FIFO buffer (using registers) + * Based on design of Ivan Miro-Panades + * History : + */ +module hpdcache_fifo_reg + // Parameters + // {{{ +#( + parameter int unsigned FIFO_DEPTH = 0, + parameter type fifo_data_t = logic +) + // }}} + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + input logic w_i, + output logic wok_o, + input fifo_data_t wdata_i, + input logic r_i, + output logic rok_o, + output fifo_data_t rdata_o +); + // }}} + + // Declaration of constants, types and functions + // {{{ + typedef logic unsigned [$clog2(FIFO_DEPTH)-1:0] fifo_addr_t; + // }}} + + // Declaration of internal wires and registers + // {{{ + fifo_data_t [FIFO_DEPTH-1:0] fifo_mem_q; + fifo_addr_t rptr_q, rptr_d; // read pointer + fifo_addr_t wptr_q, wptr_d; // write pointer + logic crossover_q, crossover_d; // write pointer has wrap + logic rexec, wexec; + logic rptr_max, wptr_max; + logic match_ptr; + // }}} + + // Global control signals + // {{{ + assign match_ptr = (wptr_q == rptr_q); + assign rok_o = match_ptr ? crossover_q : 1'b1; + assign wok_o = match_ptr ? ~crossover_q : 1'b1; + assign rexec = rok_o & r_i; + assign wexec = wok_o & w_i; + // }}} + + // Control of read and write pointers + // {{{ + assign rptr_max = (rptr_q == fifo_addr_t'(FIFO_DEPTH-1)); + assign wptr_max = (wptr_q == fifo_addr_t'(FIFO_DEPTH-1)); + + always_comb + begin : rptr_comb + if (rexec) begin + rptr_d = rptr_max ? 0 : rptr_q + 1; + end else begin + rptr_d = rptr_q; + end + end + + always_comb + begin : wptr_comb + if (wexec) begin + wptr_d = wptr_max ? 0 : wptr_q + 1; + end else begin + wptr_d = wptr_q; + end + end + + always_comb + begin : crossover_comb + if (rexec && rptr_max) begin + crossover_d = 1'b0; + end else if (wexec && wptr_max) begin + crossover_d = 1'b1; + end else begin + crossover_d = crossover_q; + end + end + // }}} + + // FIFO buffer memory management + // {{{ + always_ff @(posedge clk_i) + begin + if (wexec) fifo_mem_q[wptr_q] <= wdata_i; + end + + assign rdata_o = fifo_mem_q[rptr_q]; + // }}} + + // Setting of internal state + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + rptr_q <= 0; + wptr_q <= 0; + crossover_q <= 1'b0; + end else begin + rptr_q <= rptr_d; + wptr_q <= wptr_d; + crossover_q <= crossover_d; + end + end + // }}} + + // Assertions + // {{{ + // pragma translate_off + rptr_ahead_wptr_assert: assert property (@(posedge clk_i) + ((rptr_q <= wptr_q) && !crossover_q) || ((rptr_q >= wptr_q) && crossover_q)) else + $error("fifo: read pointer is ahead of the write pointer"); + // pragma translate_on + // }}} + +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_fxarb.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_fxarb.sv new file mode 100644 index 00000000000..292eada7dbc --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_fxarb.sv @@ -0,0 +1,85 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Fixed-Priority Arbiter + * History : + */ +module hpdcache_fxarb + // Parameters + // {{{ +#( + // Number of requesters + parameter int unsigned N = 0 +) + // }}} + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + input logic [N-1:0] req_i, + output logic [N-1:0] gnt_o, + input logic ready_i +); + // }}} + + // Declaration of internal wires and registers + // {{{ + logic [N-1:0] gnt_q, gnt; + logic wait_q; + // }}} + + // Compute the grant vector + // {{{ + hpdcache_prio_1hot_encoder #(.N(N)) prio_msk_i (.val_i(req_i), .val_o(gnt)); + // }}} + + // Compute the output grant vector + // {{{ + assign gnt_o = wait_q ? gnt_q : gnt; + // }}} + + // Setting of internal state + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + wait_q <= 1'b0; + gnt_q <= '0; + end else begin + wait_q <= ~ready_i & (wait_q | (|req_i)); + if (!ready_i && !wait_q && (|req_i)) begin + gnt_q <= gnt; + end + end + end + // }}} + + // Assertions + // {{{ + // pragma translate_off + gnt_at_most_one_requester: assert property (@(posedge clk_i) + $onehot0(gnt_o)) else $error("arbiter: granting more than one requester"); + // pragma translate_on + // }}} + +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_mux.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_mux.sv new file mode 100644 index 00000000000..d78e1ebf313 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_mux.sv @@ -0,0 +1,79 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Author(s) : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Simple multiplexor + * History : + */ +module hpdcache_mux + // Parameters + // {{{ +#( + // Number of inputs + parameter int unsigned NINPUT = 0, + + // Width in bits of each input + parameter int unsigned DATA_WIDTH = 0, + + // Selector signal is one-hot encoded + parameter bit ONE_HOT_SEL = 0, + + // Compute the width of the selection signal + localparam int unsigned NINPUT_LOG2 = $clog2(NINPUT), + localparam int unsigned SEL_WIDTH = ONE_HOT_SEL ? NINPUT : NINPUT_LOG2, + + localparam type data_t = logic [DATA_WIDTH-1:0], + localparam type sel_t = logic [SEL_WIDTH-1:0] +) + // }}} + + // Ports + // {{{ +( + input data_t [NINPUT-1:0] data_i, + input sel_t sel_i, + output data_t data_o +); + // }}} + + generate + // Selector is one-hot encoded + if (ONE_HOT_SEL == 1) begin + always_comb + begin : data_out_mux_comb + data_o = '0; + for (int unsigned i = 0; i < NINPUT; i++) begin + data_o |= sel_i[i] ? data_i[i] : '0; + end + end + + // Selector is binary encoded + end else begin + always_comb + begin : data_out_mux_comb + data_o = '0; + for (int unsigned i = 0; i < NINPUT; i++) begin + data_o |= (i == int'(sel_i)) ? data_i[i] : '0; + end + end + end + endgenerate +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv new file mode 100644 index 00000000000..36fe5bcf737 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_prio_1hot_encoder.sv @@ -0,0 +1,43 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Priority One-hot Encoder + * History : + */ +module hpdcache_prio_1hot_encoder + // Parameters +#( + parameter int unsigned N = 0 +) + // Ports +( + input logic [N-1:0] val_i, + output logic [N-1:0] val_o +); + + generate + assign val_o[0] = val_i[0]; + for (genvar i = 1; i < int'(N); i++) begin : prio_gen + assign val_o[i] = val_i[i] & ~(|val_i[i-1:0]); + end + endgenerate +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv new file mode 100644 index 00000000000..184e6fbf40d --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_regbank_wbyteenable_1rw.sv @@ -0,0 +1,63 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : 1RW register bank with write byte enable + * History : + */ +module hpdcache_regbank_wbyteenable_1rw +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + input logic [DATA_SIZE/8-1:0] wbyteenable, + output logic [DATA_SIZE-1:0] rdata +); + + /* + * Internal memory array declaration + */ + typedef logic [DATA_SIZE-1:0] mem_t [DEPTH]; + mem_t mem; + + /* + * Process to update or read the memory array + */ + always_ff @(posedge clk) + begin : mem_update_ff + if (cs == 1'b1) begin + if (we == 1'b1) begin + for (int i = 0; i < DATA_SIZE/8; i++) begin + if (wbyteenable[i]) mem[addr][i*8 +: 8] <= wdata[i*8 +: 8]; + end + end + rdata <= mem[addr]; + end + end : mem_update_ff +endmodule : hpdcache_regbank_wbyteenable_1rw diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv new file mode 100644 index 00000000000..e185bc40494 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_regbank_wmask_1rw.sv @@ -0,0 +1,61 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : 1RW register bank with write bit mask + * History : + */ +module hpdcache_regbank_wmask_1rw +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + input logic [DATA_SIZE-1:0] wmask, + output logic [DATA_SIZE-1:0] rdata +); + + /* + * Internal memory array declaration + */ + typedef logic [DATA_SIZE-1:0] mem_t [DEPTH]; + mem_t mem; + + /* + * Process to update or read the memory array + */ + always_ff @(posedge clk) + begin : mem_update_ff + if (cs == 1'b1) begin + if (we == 1'b1) begin + mem[addr] <= (mem[addr] & ~wmask) | (wdata & wmask); + end + rdata <= mem[addr]; + end + end : mem_update_ff +endmodule : hpdcache_regbank_wmask_1rw diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_rrarb.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_rrarb.sv new file mode 100644 index 00000000000..d609cb3ade0 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_rrarb.sv @@ -0,0 +1,121 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/** + * Author(s) : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Round-Robin Arbiter + * Based on design from + * http://www.rtlery.com/articles/how-design-round-robin-arbiter + * History : + */ +module hpdcache_rrarb + // Parameters + // {{{ +#( + // Number of requesters + parameter int unsigned N = 0 +) + // }}} + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + input logic [N-1:0] req_i, + output logic [N-1:0] gnt_o, + input logic ready_i +); + // }}} + + // Declaration of internal wires and registers + // {{{ + logic [N-1:0] gnt_q, gnt; + logic [N-1:0] nxt; + logic wait_q; + logic [N-1:0] mask, gnt_msk, gnt_nomsk; + logic pending; + genvar gen_i; + // }}} + + // Elaboration-time assertions + // {{{ + // pragma translate_off + generate + if (N == 0) $error("N must be greater than 0"); + endgenerate + // pragma translate_on + // }}} + + // Compute the thermometer mask vector + // {{{ + generate + if (N > 1) begin : gen_nxt_gt_1 + assign nxt = {gnt_q[N-2:0], gnt_q[N-1]}; + end else begin : gen_nxt_1 + assign nxt = gnt_q[0]; + end + + for (gen_i = 0; gen_i < int'(N); gen_i++) begin : gen_mask + assign mask[gen_i] = |nxt[gen_i:0]; + end + endgenerate + // }}} + + // Compute the grant vector + // {{{ + hpdcache_prio_1hot_encoder #(.N(N)) prio_msk_i (.val_i(req_i & mask), .val_o(gnt_msk)); + hpdcache_prio_1hot_encoder #(.N(N)) prio_nomsk_i (.val_i(req_i) , .val_o(gnt_nomsk)); + assign gnt = |gnt_msk ? gnt_msk : gnt_nomsk; + // }}} + + // Compute the output grant vector + // {{{ + assign gnt_o = wait_q ? gnt_q : gnt; + // }}} + + // Setting of internal state + // {{{ + assign pending = |req_i; + + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + wait_q <= 1'b0; + gnt_q <= {1'b1, {N-1{1'b0}}}; + end else begin + wait_q <= ~ready_i & (wait_q | pending); + if (!wait_q && pending) begin + gnt_q <= gnt; + end + end + end + // }}} + + // Assertions + // {{{ + // pragma translate_off + gnt_at_most_one_requester: assert property (@(posedge clk_i) + $onehot0(gnt)) else $error("arbiter: granting more than one requester"); + gnt_q_exactly_one_requester: assert property (@(posedge clk_i) + $onehot(gnt_q)) else $error("arbiter: grant state is not one-hot"); + // pragma translate_on + // }}} + +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_sram.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_sram.sv new file mode 100644 index 00000000000..d4cab7de79d --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_sram.sv @@ -0,0 +1,56 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : Wrapper for Behavioral SRAM macros + * History : + */ +module hpdcache_sram +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + output logic [DATA_SIZE-1:0] rdata +); + + hpdcache_sram_1rw #( + .ADDR_SIZE(ADDR_SIZE), + .DATA_SIZE(DATA_SIZE), + .DEPTH(DEPTH) + ) ram_i ( + .clk, + .rst_n, + .cs, + .we, + .addr, + .wdata, + .rdata + ); + +endmodule : hpdcache_sram diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv new file mode 100644 index 00000000000..43bdb4506f6 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_sram_wbyteenable.sv @@ -0,0 +1,58 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : Wrapper for 1RW SRAM macros implementing a write byte enable + * History : + */ +module hpdcache_sram_wbyteenable +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + input logic [DATA_SIZE/8-1:0] wbyteenable, + output logic [DATA_SIZE-1:0] rdata +); + + hpdcache_sram_wbyteenable_1rw #( + .ADDR_SIZE(ADDR_SIZE), + .DATA_SIZE(DATA_SIZE), + .DEPTH(DEPTH) + ) ram_i ( + .clk, + .rst_n, + .cs, + .we, + .addr, + .wdata, + .wbyteenable, + .rdata + ); + +endmodule : hpdcache_sram_wbyteenable diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_sram_wmask.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_sram_wmask.sv new file mode 100644 index 00000000000..a4771e3bd55 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/common/hpdcache_sram_wmask.sv @@ -0,0 +1,58 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : Wrapper for 1RW SRAM macros implementing write bit mask + * History : + */ +module hpdcache_sram_wmask +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + input logic [DATA_SIZE-1:0] wmask, + output logic [DATA_SIZE-1:0] rdata +); + + hpdcache_sram_wmask_1rw #( + .ADDR_SIZE(ADDR_SIZE), + .DATA_SIZE(DATA_SIZE), + .DEPTH(DEPTH) + ) ram_i ( + .clk, + .rst_n, + .cs, + .we, + .addr, + .wdata, + .wmask, + .rdata + ); + +endmodule : hpdcache_sram_wmask diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv new file mode 100644 index 00000000000..7288c731fdc --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/common/macros/behav/hpdcache_sram_1rw.sv @@ -0,0 +1,60 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : SRAM behavioral model + * History : + */ +module hpdcache_sram_1rw +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + output logic [DATA_SIZE-1:0] rdata +); + + /* + * Internal memory array declaration + */ + typedef logic [DATA_SIZE-1:0] mem_t [DEPTH]; + mem_t mem; + + /* + * Process to update or read the memory array + */ + always_ff @(posedge clk) + begin : mem_update_ff + if (cs == 1'b1) begin + if (we == 1'b1) begin + mem[addr] <= wdata; + end + rdata <= mem[addr]; + end + end : mem_update_ff +endmodule : hpdcache_sram_1rw diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv new file mode 100644 index 00000000000..0e5e2256ccf --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/common/macros/behav/hpdcache_sram_wbyteenable_1rw.sv @@ -0,0 +1,63 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : Behavioral model of a 1RW SRAM with write byte enable + * History : + */ +module hpdcache_sram_wbyteenable_1rw +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + input logic [DATA_SIZE/8-1:0] wbyteenable, + output logic [DATA_SIZE-1:0] rdata +); + + /* + * Internal memory array declaration + */ + typedef logic [DATA_SIZE-1:0] mem_t [DEPTH]; + mem_t mem; + + /* + * Process to update or read the memory array + */ + always_ff @(posedge clk) + begin : mem_update_ff + if (cs == 1'b1) begin + if (we == 1'b1) begin + for (int i = 0; i < DATA_SIZE/8; i++) begin + if (wbyteenable[i]) mem[addr][i*8 +: 8] <= wdata[i*8 +: 8]; + end + end + rdata <= mem[addr]; + end + end : mem_update_ff +endmodule : hpdcache_sram_wbyteenable_1rw diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv new file mode 100644 index 00000000000..5058ba28ba7 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/common/macros/behav/hpdcache_sram_wmask_1rw.sv @@ -0,0 +1,61 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : March, 2020 + * Description : Behavioral model of a 1RW SRAM with write bit mask + * History : + */ +module hpdcache_sram_wmask_1rw +#( + parameter int unsigned ADDR_SIZE = 0, + parameter int unsigned DATA_SIZE = 0, + parameter int unsigned DEPTH = 2**ADDR_SIZE +) +( + input logic clk, + input logic rst_n, + input logic cs, + input logic we, + input logic [ADDR_SIZE-1:0] addr, + input logic [DATA_SIZE-1:0] wdata, + input logic [DATA_SIZE-1:0] wmask, + output logic [DATA_SIZE-1:0] rdata +); + + /* + * Internal memory array declaration + */ + typedef logic [DATA_SIZE-1:0] mem_t [DEPTH]; + mem_t mem; + + /* + * Process to update or read the memory array + */ + always_ff @(posedge clk) + begin : mem_update_ff + if (cs == 1'b1) begin + if (we == 1'b1) begin + mem[addr] <= (mem[addr] & ~wmask) | (wdata & wmask); + end + rdata <= mem[addr]; + end + end : mem_update_ff +endmodule : hpdcache_sram_wmask_1rw diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache.sv new file mode 100644 index 00000000000..861e60c2c26 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache.sv @@ -0,0 +1,658 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache top + * History : + */ +module hpdcache +import hpdcache_pkg::*; + // Parameters + // {{{ +#( + parameter int NREQUESTERS = 1, + parameter int HPDcacheMemIdWidth = 8, + parameter int HPDcacheMemDataWidth = 512, + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_req_w_t = logic, + parameter type hpdcache_mem_resp_r_t = logic, + parameter type hpdcache_mem_resp_w_t = logic +) + // }}} + + // Ports + // {{{ +( + // Clock and reset signals + input logic clk_i, + input logic rst_ni, + + // Force the write buffer to send all pending writes + input logic wbuf_flush_i, + + // Core request interface + // 1st cycle + input logic core_req_valid_i [NREQUESTERS-1:0], + output logic core_req_ready_o [NREQUESTERS-1:0], + input hpdcache_req_t core_req_i [NREQUESTERS-1:0], + // 2nd cycle + input logic core_req_abort_i [NREQUESTERS-1:0], + input hpdcache_tag_t core_req_tag_i [NREQUESTERS-1:0], + input hpdcache_pma_t core_req_pma_i [NREQUESTERS-1:0], + + // Core response interface + output logic core_rsp_valid_o [NREQUESTERS-1:0], + output hpdcache_rsp_t core_rsp_o [NREQUESTERS-1:0], + + // Miss read interface + input logic mem_req_miss_read_ready_i, + output logic mem_req_miss_read_valid_o, + output hpdcache_mem_req_t mem_req_miss_read_o, + + output logic mem_resp_miss_read_ready_o, + input logic mem_resp_miss_read_valid_i, + input hpdcache_mem_resp_r_t mem_resp_miss_read_i, + + // Write-buffer write interface + input logic mem_req_wbuf_write_ready_i, + output logic mem_req_wbuf_write_valid_o, + output hpdcache_mem_req_t mem_req_wbuf_write_o, + + input logic mem_req_wbuf_write_data_ready_i, + output logic mem_req_wbuf_write_data_valid_o, + output hpdcache_mem_req_w_t mem_req_wbuf_write_data_o, + + output logic mem_resp_wbuf_write_ready_o, + input logic mem_resp_wbuf_write_valid_i, + input hpdcache_mem_resp_w_t mem_resp_wbuf_write_i, + + // Uncached read interface + input logic mem_req_uc_read_ready_i, + output logic mem_req_uc_read_valid_o, + output hpdcache_mem_req_t mem_req_uc_read_o, + + output logic mem_resp_uc_read_ready_o, + input logic mem_resp_uc_read_valid_i, + input hpdcache_mem_resp_r_t mem_resp_uc_read_i, + + // Uncached write interface + input logic mem_req_uc_write_ready_i, + output logic mem_req_uc_write_valid_o, + output hpdcache_mem_req_t mem_req_uc_write_o, + + input logic mem_req_uc_write_data_ready_i, + output logic mem_req_uc_write_data_valid_o, + output hpdcache_mem_req_w_t mem_req_uc_write_data_o, + + output logic mem_resp_uc_write_ready_o, + input logic mem_resp_uc_write_valid_i, + input hpdcache_mem_resp_w_t mem_resp_uc_write_i, + + // Performance events + output logic evt_cache_write_miss_o, + output logic evt_cache_read_miss_o, + output logic evt_uncached_req_o, + output logic evt_cmo_req_o, + output logic evt_write_req_o, + output logic evt_read_req_o, + output logic evt_prefetch_req_o, + output logic evt_req_on_hold_o, + output logic evt_rtab_rollback_o, + output logic evt_stall_refill_o, + output logic evt_stall_o, + + // Status interface + output logic wbuf_empty_o, + + // Configuration interface + input logic cfg_enable_i, + input wbuf_timecnt_t cfg_wbuf_threshold_i, + input logic cfg_wbuf_reset_timecnt_on_write_i, + input logic cfg_wbuf_sequential_waw_i, + input logic cfg_wbuf_inhibit_write_coalescing_i, + input logic cfg_prefetch_updt_plru_i, + input logic cfg_error_on_cacheable_amo_i, + input logic cfg_rtab_single_entry_i +); + + // }}} + + // Declaration of internal signals + // {{{ + logic refill_req_valid; + logic refill_req_ready; + logic refill_busy; + logic refill_updt_plru; + hpdcache_set_t refill_set; + hpdcache_dir_entry_t refill_dir_entry; + hpdcache_way_vector_t refill_read_victim_way; + hpdcache_way_vector_t refill_write_victim_way; + logic refill_write_dir; + logic refill_write_data; + hpdcache_word_t refill_word; + hpdcache_refill_data_t refill_data; + logic refill_core_rsp_valid; + hpdcache_rsp_t refill_core_rsp; + hpdcache_nline_t refill_nline; + logic refill_updt_rtab; + + logic miss_mshr_empty; + logic miss_mshr_check; + mshr_set_t miss_mshr_check_set; + mshr_tag_t miss_mshr_check_tag; + logic miss_mshr_hit; + logic miss_mshr_alloc_cs; + logic miss_mshr_alloc; + logic miss_mshr_alloc_ready; + logic miss_mshr_alloc_full; + hpdcache_nline_t miss_mshr_alloc_nline; + hpdcache_req_tid_t miss_mshr_alloc_tid; + hpdcache_req_sid_t miss_mshr_alloc_sid; + hpdcache_word_t miss_mshr_alloc_word; + logic miss_mshr_alloc_need_rsp; + logic miss_mshr_alloc_is_prefetch; + + logic wbuf_flush_all; + logic wbuf_write; + logic wbuf_write_ready; + wbuf_addr_t wbuf_write_addr; + wbuf_data_t wbuf_write_data; + wbuf_be_t wbuf_write_be; + logic wbuf_write_uncacheable; + logic wbuf_read_hit; + logic wbuf_read_flush_hit; + hpdcache_req_addr_t wbuf_rtab_addr; + logic wbuf_rtab_is_read; + logic wbuf_rtab_hit_open; + logic wbuf_rtab_hit_pend; + logic wbuf_rtab_hit_sent; + logic wbuf_rtab_not_ready; + + logic uc_ready; + logic uc_req_valid; + hpdcache_uc_op_t uc_req_op; + hpdcache_req_addr_t uc_req_addr; + hpdcache_req_size_t uc_req_size; + hpdcache_req_data_t uc_req_data; + hpdcache_req_be_t uc_req_be; + logic uc_req_uncacheable; + hpdcache_req_sid_t uc_req_sid; + hpdcache_req_tid_t uc_req_tid; + logic uc_req_need_rsp; + logic uc_wbuf_flush_all; + logic uc_dir_amo_match; + hpdcache_set_t uc_dir_amo_match_set; + hpdcache_tag_t uc_dir_amo_match_tag; + logic uc_dir_amo_update_plru; + hpdcache_way_vector_t uc_dir_amo_hit_way; + logic uc_data_amo_write; + logic uc_data_amo_write_enable; + hpdcache_set_t uc_data_amo_write_set; + hpdcache_req_size_t uc_data_amo_write_size; + hpdcache_word_t uc_data_amo_write_word; + logic [63:0] uc_data_amo_write_data; + logic [7:0] uc_data_amo_write_be; + logic uc_lrsc_snoop; + hpdcache_req_addr_t uc_lrsc_snoop_addr; + hpdcache_req_size_t uc_lrsc_snoop_size; + logic uc_core_rsp_ready; + logic uc_core_rsp_valid; + hpdcache_rsp_t uc_core_rsp; + + logic cmo_req_valid; + logic cmo_ready; + hpdcache_cmoh_op_t cmo_req_op; + hpdcache_req_addr_t cmo_req_addr; + hpdcache_req_data_t cmo_req_wdata; + logic cmo_wbuf_flush_all; + logic cmo_dir_check; + hpdcache_set_t cmo_dir_check_set; + hpdcache_tag_t cmo_dir_check_tag; + hpdcache_way_vector_t cmo_dir_check_hit_way; + logic cmo_dir_inval; + hpdcache_set_t cmo_dir_inval_set; + hpdcache_way_vector_t cmo_dir_inval_way; + + logic rtab_empty; + logic ctrl_empty; + + logic core_rsp_valid; + hpdcache_rsp_t core_rsp; + + logic arb_req_valid; + logic arb_req_ready; + hpdcache_req_t arb_req; + logic arb_abort; + hpdcache_tag_t arb_tag; + hpdcache_pma_t arb_pma; + + localparam logic [HPDcacheMemIdWidth-1:0] HPDCACHE_UC_READ_ID = {HPDcacheMemIdWidth{1'b1}}; + localparam logic [HPDcacheMemIdWidth-1:0] HPDCACHE_UC_WRITE_ID = {HPDcacheMemIdWidth{1'b1}}; + // }}} + + // Requesters arbiter + // {{{ + hpdcache_core_arbiter #( + .NREQUESTERS (NREQUESTERS) + ) core_req_arbiter_i ( + .clk_i, + .rst_ni, + + .core_req_valid_i, + .core_req_ready_o, + .core_req_i, + .core_req_abort_i, + .core_req_tag_i, + .core_req_pma_i, + + .core_rsp_valid_i (core_rsp_valid), + .core_rsp_i (core_rsp), + .core_rsp_valid_o, + .core_rsp_o, + + .arb_req_valid_o (arb_req_valid), + .arb_req_ready_i (arb_req_ready), + .arb_req_o (arb_req), + .arb_abort_o (arb_abort), + .arb_tag_o (arb_tag), + .arb_pma_o (arb_pma) + ); + // }}} + + // HPDcache controller + // {{{ + hpdcache_ctrl hpdcache_ctrl_i( + .clk_i, + .rst_ni, + + .core_req_valid_i (arb_req_valid), + .core_req_ready_o (arb_req_ready), + .core_req_i (arb_req), + .core_req_abort_i (arb_abort), + .core_req_tag_i (arb_tag), + .core_req_pma_i (arb_pma), + + .core_rsp_valid_o (core_rsp_valid), + .core_rsp_o (core_rsp), + + .wbuf_flush_i, + + .cachedir_hit_o (/* unused */), + + .miss_mshr_check_o (miss_mshr_check), + .miss_mshr_check_set_o (miss_mshr_check_set), + .miss_mshr_check_tag_o (miss_mshr_check_tag), + .miss_mshr_alloc_o (miss_mshr_alloc), + .miss_mshr_alloc_cs_o (miss_mshr_alloc_cs), + .miss_mshr_alloc_ready_i (miss_mshr_alloc_ready), + .miss_mshr_alloc_full_i (miss_mshr_alloc_full), + .miss_mshr_alloc_nline_o (miss_mshr_alloc_nline), + .miss_mshr_alloc_tid_o (miss_mshr_alloc_tid), + .miss_mshr_alloc_sid_o (miss_mshr_alloc_sid), + .miss_mshr_alloc_word_o (miss_mshr_alloc_word), + .miss_mshr_alloc_need_rsp_o (miss_mshr_alloc_need_rsp), + .miss_mshr_alloc_is_prefetch_o (miss_mshr_alloc_is_prefetch), + .miss_mshr_hit_i (miss_mshr_hit), + + .refill_req_valid_i (refill_req_valid), + .refill_req_ready_o (refill_req_ready), + .refill_busy_i (refill_busy), + .refill_updt_plru_i (refill_updt_plru), + .refill_set_i (refill_set), + .refill_dir_entry_i (refill_dir_entry), + .refill_victim_way_o (refill_read_victim_way), + .refill_victim_way_i (refill_write_victim_way), + .refill_write_dir_i (refill_write_dir), + .refill_write_data_i (refill_write_data), + .refill_word_i (refill_word), + .refill_data_i (refill_data), + .refill_core_rsp_valid_i (refill_core_rsp_valid), + .refill_core_rsp_i (refill_core_rsp), + .refill_nline_i (refill_nline), + .refill_updt_rtab_i (refill_updt_rtab), + + .wbuf_empty_i (wbuf_empty_o), + .wbuf_flush_all_o (wbuf_flush_all), + .wbuf_write_o (wbuf_write), + .wbuf_write_ready_i (wbuf_write_ready), + .wbuf_write_addr_o (wbuf_write_addr), + .wbuf_write_data_o (wbuf_write_data), + .wbuf_write_be_o (wbuf_write_be), + .wbuf_write_uncacheable_o (wbuf_write_uncacheable), + .wbuf_read_hit_i (wbuf_read_hit), + .wbuf_read_flush_hit_o (wbuf_read_flush_hit), + .wbuf_rtab_addr_o (wbuf_rtab_addr), + .wbuf_rtab_is_read_o (wbuf_rtab_is_read), + .wbuf_rtab_hit_open_i (wbuf_rtab_hit_open), + .wbuf_rtab_hit_pend_i (wbuf_rtab_hit_pend), + .wbuf_rtab_hit_sent_i (wbuf_rtab_hit_sent), + .wbuf_rtab_not_ready_i (wbuf_rtab_not_ready), + + .uc_busy_i (~uc_ready), + .uc_lrsc_snoop_o (uc_lrsc_snoop), + .uc_lrsc_snoop_addr_o (uc_lrsc_snoop_addr), + .uc_lrsc_snoop_size_o (uc_lrsc_snoop_size), + .uc_req_valid_o (uc_req_valid), + .uc_req_op_o (uc_req_op), + .uc_req_addr_o (uc_req_addr), + .uc_req_size_o (uc_req_size), + .uc_req_data_o (uc_req_data), + .uc_req_be_o (uc_req_be), + .uc_req_uc_o (uc_req_uncacheable), + .uc_req_sid_o (uc_req_sid), + .uc_req_tid_o (uc_req_tid), + .uc_req_need_rsp_o (uc_req_need_rsp), + .uc_wbuf_flush_all_i (uc_wbuf_flush_all), + .uc_dir_amo_match_i (uc_dir_amo_match), + .uc_dir_amo_match_set_i (uc_dir_amo_match_set), + .uc_dir_amo_match_tag_i (uc_dir_amo_match_tag), + .uc_dir_amo_update_plru_i (uc_dir_amo_update_plru), + .uc_dir_amo_hit_way_o (uc_dir_amo_hit_way), + .uc_data_amo_write_i (uc_data_amo_write), + .uc_data_amo_write_enable_i (uc_data_amo_write_enable), + .uc_data_amo_write_set_i (uc_data_amo_write_set), + .uc_data_amo_write_size_i (uc_data_amo_write_size), + .uc_data_amo_write_word_i (uc_data_amo_write_word), + .uc_data_amo_write_data_i (uc_data_amo_write_data), + .uc_data_amo_write_be_i (uc_data_amo_write_be), + .uc_core_rsp_ready_o (uc_core_rsp_ready), + .uc_core_rsp_valid_i (uc_core_rsp_valid), + .uc_core_rsp_i (uc_core_rsp), + + .cmo_busy_i (~cmo_ready), + .cmo_req_valid_o (cmo_req_valid), + .cmo_req_op_o (cmo_req_op), + .cmo_req_addr_o (cmo_req_addr), + .cmo_req_wdata_o (cmo_req_wdata), + .cmo_wbuf_flush_all_i (cmo_wbuf_flush_all), + .cmo_dir_check_i (cmo_dir_check), + .cmo_dir_check_set_i (cmo_dir_check_set), + .cmo_dir_check_tag_i (cmo_dir_check_tag), + .cmo_dir_check_hit_way_o (cmo_dir_check_hit_way), + .cmo_dir_inval_i (cmo_dir_inval), + .cmo_dir_inval_set_i (cmo_dir_inval_set), + .cmo_dir_inval_way_i (cmo_dir_inval_way), + + .rtab_empty_o (rtab_empty), + .ctrl_empty_o (ctrl_empty), + + .cfg_enable_i, + .cfg_rtab_single_entry_i, + + .evt_cache_write_miss_o, + .evt_cache_read_miss_o, + .evt_uncached_req_o, + .evt_cmo_req_o, + .evt_write_req_o, + .evt_read_req_o, + .evt_prefetch_req_o, + .evt_req_on_hold_o, + .evt_rtab_rollback_o, + .evt_stall_refill_o, + .evt_stall_o + ); + // }}} + + // HPDcache write-buffer + // {{{ + hpdcache_wbuf_wrapper #( + .HPDcacheMemIdWidth (HPDcacheMemIdWidth), + .HPDcacheMemDataWidth (HPDcacheMemDataWidth), + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), + .hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t) + ) hpdcache_wbuf_i( + .clk_i, + .rst_ni, + + .empty_o (wbuf_empty_o), + .full_o (/* unused */), + .flush_all_i (wbuf_flush_all), + + .cfg_threshold_i (cfg_wbuf_threshold_i), + .cfg_reset_timecnt_on_write_i (cfg_wbuf_reset_timecnt_on_write_i), + .cfg_sequential_waw_i (cfg_wbuf_sequential_waw_i), + .cfg_inhibit_write_coalescing_i (cfg_wbuf_inhibit_write_coalescing_i), + + .write_i (wbuf_write), + .write_ready_o (wbuf_write_ready), + .write_addr_i (wbuf_write_addr), + .write_data_i (wbuf_write_data), + .write_be_i (wbuf_write_be), + .write_uc_i (wbuf_write_uncacheable), + + .read_addr_i (wbuf_write_addr), + .read_hit_o (wbuf_read_hit), + .read_flush_hit_i (wbuf_read_flush_hit), + + .replay_addr_i (wbuf_rtab_addr), + .replay_is_read_i (wbuf_rtab_is_read), + .replay_open_hit_o (wbuf_rtab_hit_open), + .replay_pend_hit_o (wbuf_rtab_hit_pend), + .replay_sent_hit_o (wbuf_rtab_hit_sent), + .replay_not_ready_o (wbuf_rtab_not_ready), + + .mem_req_write_ready_i (mem_req_wbuf_write_ready_i), + .mem_req_write_valid_o (mem_req_wbuf_write_valid_o), + .mem_req_write_o (mem_req_wbuf_write_o), + + .mem_req_write_data_ready_i (mem_req_wbuf_write_data_ready_i), + .mem_req_write_data_valid_o (mem_req_wbuf_write_data_valid_o), + .mem_req_write_data_o (mem_req_wbuf_write_data_o), + + .mem_resp_write_ready_o (mem_resp_wbuf_write_ready_o), + .mem_resp_write_valid_i (mem_resp_wbuf_write_valid_i), + .mem_resp_write_i (mem_resp_wbuf_write_i) + ); + // }}} + + // Miss handler + // {{{ + hpdcache_miss_handler #( + .HPDcacheMemIdWidth (HPDcacheMemIdWidth), + .HPDcacheMemDataWidth (HPDcacheMemDataWidth), + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t) + ) hpdcache_miss_handler_i( + .clk_i, + .rst_ni, + + .mshr_empty_o (miss_mshr_empty), + .mshr_full_o (/* unused */), + + .cfg_prefetch_updt_plru_i, + + .mshr_check_i (miss_mshr_check), + .mshr_check_set_i (miss_mshr_check_set), + .mshr_check_tag_i (miss_mshr_check_tag), + .mshr_check_hit_o (miss_mshr_hit), + + .mshr_alloc_ready_o (miss_mshr_alloc_ready), + .mshr_alloc_i (miss_mshr_alloc), + .mshr_alloc_cs_i (miss_mshr_alloc_cs), + .mshr_alloc_full_o (miss_mshr_alloc_full), + .mshr_alloc_nline_i (miss_mshr_alloc_nline), + .mshr_alloc_tid_i (miss_mshr_alloc_tid), + .mshr_alloc_sid_i (miss_mshr_alloc_sid), + .mshr_alloc_word_i (miss_mshr_alloc_word), + .mshr_alloc_need_rsp_i (miss_mshr_alloc_need_rsp), + .mshr_alloc_is_prefetch_i (miss_mshr_alloc_is_prefetch), + + .refill_req_ready_i (refill_req_ready), + .refill_req_valid_o (refill_req_valid), + .refill_busy_o (refill_busy), + .refill_updt_plru_o (refill_updt_plru), + .refill_set_o (refill_set), + .refill_dir_entry_o (refill_dir_entry), + .refill_victim_way_i (refill_read_victim_way), + .refill_write_dir_o (refill_write_dir), + .refill_write_data_o (refill_write_data), + .refill_victim_way_o (refill_write_victim_way), + .refill_data_o (refill_data), + .refill_word_o (refill_word), + .refill_nline_o (refill_nline), + .refill_updt_rtab_o (refill_updt_rtab), + + .refill_core_rsp_valid_o (refill_core_rsp_valid), + .refill_core_rsp_o (refill_core_rsp), + + .mem_req_ready_i (mem_req_miss_read_ready_i), + .mem_req_valid_o (mem_req_miss_read_valid_o), + .mem_req_o (mem_req_miss_read_o), + + .mem_resp_ready_o (mem_resp_miss_read_ready_o), + .mem_resp_valid_i (mem_resp_miss_read_valid_i), + .mem_resp_i (mem_resp_miss_read_i) + ); + // }}} + + // Uncacheable request handler + // {{{ + hpdcache_uncached #( + .HPDcacheMemIdWidth (HPDcacheMemIdWidth), + .HPDcacheMemDataWidth (HPDcacheMemDataWidth), + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), + .hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t), + .hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t) + ) hpdcache_uc_i( + .clk_i, + .rst_ni, + + .wbuf_empty_i (wbuf_empty_o), + .mshr_empty_i (miss_mshr_empty), + .rtab_empty_i (rtab_empty), + .ctrl_empty_i (ctrl_empty), + + .req_valid_i (uc_req_valid), + .req_ready_o (uc_ready), + .req_op_i (uc_req_op), + .req_addr_i (uc_req_addr), + .req_size_i (uc_req_size), + .req_data_i (uc_req_data), + .req_be_i (uc_req_be), + .req_uc_i (uc_req_uncacheable), + .req_sid_i (uc_req_sid), + .req_tid_i (uc_req_tid), + .req_need_rsp_i (uc_req_need_rsp), + + .wbuf_flush_all_o (uc_wbuf_flush_all), + + .dir_amo_match_o (uc_dir_amo_match), + .dir_amo_match_set_o (uc_dir_amo_match_set), + .dir_amo_match_tag_o (uc_dir_amo_match_tag), + .dir_amo_update_plru_o (uc_dir_amo_update_plru), + .dir_amo_hit_way_i (uc_dir_amo_hit_way), + + .data_amo_write_o (uc_data_amo_write), + .data_amo_write_enable_o (uc_data_amo_write_enable), + .data_amo_write_set_o (uc_data_amo_write_set), + .data_amo_write_size_o (uc_data_amo_write_size), + .data_amo_write_word_o (uc_data_amo_write_word), + .data_amo_write_data_o (uc_data_amo_write_data), + .data_amo_write_be_o (uc_data_amo_write_be), + + .lrsc_snoop_i (uc_lrsc_snoop), + .lrsc_snoop_addr_i (uc_lrsc_snoop_addr), + .lrsc_snoop_size_i (uc_lrsc_snoop_size), + + .core_rsp_ready_i (uc_core_rsp_ready), + .core_rsp_valid_o (uc_core_rsp_valid), + .core_rsp_o (uc_core_rsp), + + .mem_read_id_i (HPDCACHE_UC_READ_ID), + .mem_write_id_i (HPDCACHE_UC_WRITE_ID), + + .mem_req_read_ready_i (mem_req_uc_read_ready_i), + .mem_req_read_valid_o (mem_req_uc_read_valid_o), + .mem_req_read_o (mem_req_uc_read_o), + + .mem_resp_read_ready_o (mem_resp_uc_read_ready_o), + .mem_resp_read_valid_i (mem_resp_uc_read_valid_i), + .mem_resp_read_i (mem_resp_uc_read_i), + + .mem_req_write_ready_i (mem_req_uc_write_ready_i), + .mem_req_write_valid_o (mem_req_uc_write_valid_o), + .mem_req_write_o (mem_req_uc_write_o), + + .mem_req_write_data_ready_i (mem_req_uc_write_data_ready_i), + .mem_req_write_data_valid_o (mem_req_uc_write_data_valid_o), + .mem_req_write_data_o (mem_req_uc_write_data_o), + + .mem_resp_write_ready_o (mem_resp_uc_write_ready_o), + .mem_resp_write_valid_i (mem_resp_uc_write_valid_i), + .mem_resp_write_i (mem_resp_uc_write_i), + + .cfg_error_on_cacheable_amo_i + ); + + // CMO Request Handler + // {{{ + hpdcache_cmo hpdcache_cmo_i( + .clk_i, + .rst_ni, + + .wbuf_empty_i (wbuf_empty_o), + .mshr_empty_i (miss_mshr_empty), + .rtab_empty_i (rtab_empty), + .ctrl_empty_i (ctrl_empty), + + .req_valid_i (cmo_req_valid), + .req_ready_o (cmo_ready), + .req_op_i (cmo_req_op), + .req_addr_i (cmo_req_addr), + .req_wdata_i (cmo_req_wdata), + + .wbuf_flush_all_o (cmo_wbuf_flush_all), + + .dir_check_o (cmo_dir_check), + .dir_check_set_o (cmo_dir_check_set), + .dir_check_tag_o (cmo_dir_check_tag), + .dir_check_hit_way_i (cmo_dir_check_hit_way), + + .dir_inval_o (cmo_dir_inval), + .dir_inval_set_o (cmo_dir_inval_set), + .dir_inval_way_o (cmo_dir_inval_way) + ); + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial begin + req_access_width_assert: + assert (HPDCACHE_REQ_WORDS <= HPDCACHE_ACCESS_WORDS) else + $error("req data width shall be l.e. to cache access width"); + refill_access_width_assert: + assert (HPDCACHE_CL_WORDS >= HPDCACHE_ACCESS_WORDS) else + $error("cache access width shall be l.e. to cache-line width"); + miss_mem_id_width_assert: + assert (HPDcacheMemIdWidth >= (HPDCACHE_MSHR_WAY_WIDTH + HPDCACHE_MSHR_SET_WIDTH)) else + $error("insufficient ID bits on the mem interface to transport misses"); + wbuf_mem_id_width_assert: + assert (HPDcacheMemIdWidth >= HPDCACHE_WBUF_DIR_PTR_WIDTH) else + $error("insufficient ID bits on the mem interface to transport writes"); + + end + // pragma translate_on + // }}} + +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache.vlt b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache.vlt new file mode 100644 index 00000000000..2a35859eecc --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache.vlt @@ -0,0 +1,29 @@ +`verilator_config +// +// Copyright 2023 CEA* +// *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you +// may not use this file except in compliance with the License, or, at your +// option, the Apache License version 2.0. You may obtain a copy of the +// License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work +// distributed under the License is distributed on an “AS IS” BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. +// + +// +// Authors Cesar Fuguet +// Creation Date April, 2021 +// Description Verilator's configuration file +// History +// +lint_off -rule PINCONNECTEMPTY +lint_off -rule DECLFILENAME diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_amo.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_amo.sv new file mode 100644 index 00000000000..d233af1d83e --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_amo.sv @@ -0,0 +1,67 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : May, 2021 + * Description : HPDcache AMO computing unit + * History : + */ +module hpdcache_amo +import hpdcache_pkg::*; +// Ports +// {{{ +( + input logic [63:0] ld_data_i, + input logic [63:0] st_data_i, + input hpdcache_uc_op_t op_i, + output logic [63:0] result_o +); +// }}} + + logic signed [63:0] ld_data; + logic signed [63:0] st_data; + logic signed [63:0] sum; + logic ugt, sgt; + + assign ld_data = ld_data_i, + st_data = st_data_i; + + assign ugt = (ld_data_i > st_data_i), + sgt = (ld_data > st_data), + sum = ld_data + st_data; + + always_comb + begin : amo_compute_comb + unique case (1'b1) + op_i.is_amo_lr : result_o = ld_data_i; + op_i.is_amo_sc : result_o = st_data_i; + op_i.is_amo_swap : result_o = st_data_i; + op_i.is_amo_add : result_o = sum; + op_i.is_amo_and : result_o = ld_data_i & st_data_i; + op_i.is_amo_or : result_o = ld_data_i | st_data_i; + op_i.is_amo_xor : result_o = ld_data_i ^ st_data_i; + op_i.is_amo_max : result_o = sgt ? ld_data_i : st_data_i; + op_i.is_amo_maxu : result_o = ugt ? ld_data_i : st_data_i; + op_i.is_amo_min : result_o = sgt ? st_data_i : ld_data_i; + op_i.is_amo_minu : result_o = ugt ? st_data_i : ld_data_i; + default : result_o = '0; + endcase + end +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_cmo.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_cmo.sv new file mode 100644 index 00000000000..8302d0a9d7a --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_cmo.sv @@ -0,0 +1,250 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : July, 2021 + * Description : HPDcache Cache-Management-Operation Handler + * History : + */ +module hpdcache_cmo +import hpdcache_pkg::*; +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // Global control signals + // {{{ + input logic wbuf_empty_i, + input logic mshr_empty_i, + input logic rtab_empty_i, + input logic ctrl_empty_i, + // }}} + + // Request interface + // {{{ + input logic req_valid_i, + output logic req_ready_o, + input hpdcache_cmoh_op_t req_op_i, + input hpdcache_req_addr_t req_addr_i, + input hpdcache_req_data_t req_wdata_i, + // }}} + + // Write Buffer Interface + // {{{ + output logic wbuf_flush_all_o, + // }}} + + // Cache Directory Interface + // {{{ + output logic dir_check_o, + output hpdcache_set_t dir_check_set_o, + output hpdcache_tag_t dir_check_tag_o, + input hpdcache_way_vector_t dir_check_hit_way_i, + + output logic dir_inval_o, + output hpdcache_set_t dir_inval_set_o, + output hpdcache_way_vector_t dir_inval_way_o + // }}} +); +// }}} + +// Definition of constants and types +// {{{ + typedef enum { + CMOH_IDLE, + CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY, + CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY, + CMOH_INVAL_CHECK_NLINE, + CMOH_INVAL_SET + } hpdcache_cmoh_fsm_t; +// }}} + +// Internal signals and registers +// {{{ + hpdcache_cmoh_fsm_t cmoh_fsm_q, cmoh_fsm_d; + hpdcache_cmoh_op_t cmoh_op_q, cmoh_op_d; + hpdcache_req_addr_t cmoh_addr_q, cmoh_addr_d; + hpdcache_way_vector_t cmoh_way_q, cmoh_way_d; + hpdcache_set_t cmoh_set_cnt_q, cmoh_set_cnt_d; + hpdcache_nline_t cmoh_nline_q; + hpdcache_tag_t cmoh_tag_q; + hpdcache_set_t cmoh_set_q; + hpdcache_data_word_t cmoh_wdata; +// }}} + +// CMO request handler FSM +// {{{ + assign cmoh_nline_q = cmoh_addr_q[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_NLINE_WIDTH], + cmoh_set_q = cmoh_nline_q[0 +: HPDCACHE_SET_WIDTH], + cmoh_tag_q = cmoh_nline_q[HPDCACHE_SET_WIDTH +: HPDCACHE_TAG_WIDTH]; + + assign dir_check_set_o = cmoh_set_q, + dir_check_tag_o = cmoh_tag_q; + + assign req_ready_o = (cmoh_fsm_q == CMOH_IDLE); + + // Only the least significant word of the write data contains parameters + // for the CMO handler + assign cmoh_wdata = req_wdata_i[0]; + + always_comb + begin : cmoh_fsm_comb + cmoh_op_d = cmoh_op_q; + cmoh_addr_d = cmoh_addr_q; + cmoh_way_d = cmoh_way_q; + cmoh_set_cnt_d = cmoh_set_cnt_q; + + dir_check_o = 1'b0; + + dir_inval_o = 1'b0; + dir_inval_set_o = cmoh_set_q; + dir_inval_way_o = '0; + + wbuf_flush_all_o = 1'b0; + + cmoh_fsm_d = cmoh_fsm_q; + + case (cmoh_fsm_q) + CMOH_IDLE: begin + cmoh_fsm_d = CMOH_IDLE; + + if (req_valid_i) begin + unique case (1'b1) + req_op_i.is_fence: begin + // request to the write buffer to send all open entries + wbuf_flush_all_o = rtab_empty_i; + + // then wait for the write buffer to be empty + if (!rtab_empty_i || !wbuf_empty_i) begin + cmoh_fsm_d = CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY; + end + end + req_op_i.is_inval_by_nline, + req_op_i.is_inval_by_set, + req_op_i.is_inval_all: begin + cmoh_op_d = req_op_i; + cmoh_addr_d = req_addr_i; + cmoh_way_d = cmoh_wdata[0 +: HPDCACHE_WAYS]; + cmoh_set_cnt_d = 0; + if (mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin + if (req_op_i.is_inval_by_nline) begin + cmoh_fsm_d = CMOH_INVAL_CHECK_NLINE; + end else begin + cmoh_fsm_d = CMOH_INVAL_SET; + end + end else begin + cmoh_fsm_d = CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY; + end + end + default: begin + // pragma translate_off + $error("cmo handler: unexpected operation"); + // pragma translate_on + end + endcase + end + end + CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY: begin + wbuf_flush_all_o = rtab_empty_i; + + if (wbuf_empty_i && rtab_empty_i) begin + cmoh_fsm_d = CMOH_IDLE; + end else begin + cmoh_fsm_d = CMOH_FENCE_WAIT_WBUF_RTAB_EMPTY; + end + end + CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY: begin + cmoh_fsm_d = CMOH_INVAL_WAIT_MSHR_RTAB_EMPTY; + if (mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin + if (cmoh_op_q.is_inval_by_nline) begin + cmoh_fsm_d = CMOH_INVAL_CHECK_NLINE; + end else begin + cmoh_fsm_d = CMOH_INVAL_SET; + end + end + end + CMOH_INVAL_CHECK_NLINE: begin + dir_check_o = 1'b1; + cmoh_fsm_d = CMOH_INVAL_SET; + end + CMOH_INVAL_SET: begin + cmoh_fsm_d = CMOH_INVAL_SET; + case (1'b1) + cmoh_op_q.is_inval_by_nline: begin + dir_inval_o = |dir_check_hit_way_i; + dir_inval_way_o = dir_check_hit_way_i; + cmoh_fsm_d = CMOH_IDLE; + end + cmoh_op_q.is_inval_all: begin + dir_inval_o = 1'b1; + dir_inval_way_o = {HPDCACHE_WAYS{1'b1}}; + dir_inval_set_o = cmoh_set_cnt_q; + cmoh_set_cnt_d = cmoh_set_cnt_q + 1; + if (cmoh_set_cnt_q == hpdcache_set_t'(HPDCACHE_SETS - 1)) begin + cmoh_fsm_d = CMOH_IDLE; + end + end + cmoh_op_q.is_inval_by_set: begin + dir_inval_o = 1'b1; + dir_inval_way_o = cmoh_way_q; + cmoh_fsm_d = CMOH_IDLE; + end + endcase + end + endcase + end +// }}} + +// CMO request handler set state +// {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + cmoh_fsm_q <= CMOH_IDLE; + end else begin + cmoh_fsm_q <= cmoh_fsm_d; + end + end + + always_ff @(posedge clk_i) + begin + cmoh_op_q <= cmoh_op_d; + cmoh_addr_q <= cmoh_addr_d; + cmoh_way_q <= cmoh_way_d; + cmoh_set_cnt_q <= cmoh_set_cnt_d; + end +// }}} + +// Assertions +// {{{ +// pragma translate_off + assert property (@(posedge clk_i) + req_valid_i -> $onehot(req_op_i)) else + $error("cmo_handler: more than one operation type requested"); + + assert property (@(posedge clk_i) + req_valid_i -> (cmoh_fsm_q == CMOH_IDLE)) else + $error("cmo_handler: new request received while busy"); +// pragma translate_on +// }}} + +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_core_arbiter.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_core_arbiter.sv new file mode 100644 index 00000000000..1f8f5a475c3 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_core_arbiter.sv @@ -0,0 +1,171 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : September, 2023 + * Description : HPDcache request arbiter + * History : + */ +module hpdcache_core_arbiter +import hpdcache_pkg::*; + // Parameters + // {{{ +#( + parameter int NREQUESTERS = 1 +) + // }}} + + // Ports + // {{{ +( + // Clock and reset signals + input logic clk_i, + input logic rst_ni, + + // Core request interface + // 1st cycle + input logic core_req_valid_i [NREQUESTERS-1:0], + output logic core_req_ready_o [NREQUESTERS-1:0], + input hpdcache_req_t core_req_i [NREQUESTERS-1:0], + // 2nd cycle + input logic core_req_abort_i [NREQUESTERS-1:0], + input hpdcache_tag_t core_req_tag_i [NREQUESTERS-1:0], + input hpdcache_pma_t core_req_pma_i [NREQUESTERS-1:0], + + // Core response interface + input logic core_rsp_valid_i, + input hpdcache_rsp_t core_rsp_i, + output logic core_rsp_valid_o [NREQUESTERS-1:0], + output hpdcache_rsp_t core_rsp_o [NREQUESTERS-1:0], + + // Granted request + output logic arb_req_valid_o, + input logic arb_req_ready_i, + output hpdcache_req_t arb_req_o, + output logic arb_abort_o, + output hpdcache_tag_t arb_tag_o, + output hpdcache_pma_t arb_pma_o +); + + // }}} + + // Declaration of internal signals + // {{{ + logic [NREQUESTERS-1:0] core_req_valid; + hpdcache_req_t [NREQUESTERS-1:0] core_req; + logic [NREQUESTERS-1:0] core_req_abort; + hpdcache_tag_t [NREQUESTERS-1:0] core_req_tag; + hpdcache_pma_t [NREQUESTERS-1:0] core_req_pma; + + logic [NREQUESTERS-1:0] arb_req_gnt_q, arb_req_gnt_d; + // }}} + + // Requesters arbiter + // {{{ + // Pack request ports + genvar gen_i; + + generate + for (gen_i = 0; gen_i < int'(NREQUESTERS); gen_i++) begin : gen_core_req + assign core_req_ready_o[gen_i] = arb_req_gnt_d[gen_i] & arb_req_ready_i, + core_req_valid[gen_i] = core_req_valid_i[gen_i], + core_req[gen_i] = core_req_i[gen_i]; + + assign core_req_abort[gen_i] = core_req_abort_i[gen_i], + core_req_tag[gen_i] = core_req_tag_i[gen_i], + core_req_pma[gen_i] = core_req_pma_i[gen_i]; + end + endgenerate + + // Arbiter + hpdcache_fxarb #(.N(NREQUESTERS)) req_arbiter_i + ( + .clk_i, + .rst_ni, + .req_i (core_req_valid), + .gnt_o (arb_req_gnt_d), + .ready_i (arb_req_ready_i) + ); + + // Request multiplexor + hpdcache_mux #( + .NINPUT (NREQUESTERS), + .DATA_WIDTH ($bits(hpdcache_req_t)), + .ONE_HOT_SEL (1'b1) + ) core_req_mux_i ( + .data_i (core_req), + .sel_i (arb_req_gnt_d), + .data_o (arb_req_o) + ); + + // Request abort multiplexor + hpdcache_mux #( + .NINPUT (NREQUESTERS), + .DATA_WIDTH (1), + .ONE_HOT_SEL (1'b1) + ) core_req_abort_mux_i ( + .data_i (core_req_abort), + .sel_i (arb_req_gnt_q), + .data_o (arb_abort_o) + ); + + // Tag Multiplexor + hpdcache_mux #( + .NINPUT (NREQUESTERS), + .DATA_WIDTH ($bits(hpdcache_tag_t)), + .ONE_HOT_SEL (1'b1) + ) core_req_tag_mux_i ( + .data_i (core_req_tag), + .sel_i (arb_req_gnt_q), + .data_o (arb_tag_o) + ); + + // PMA Multiplexor + hpdcache_mux #( + .NINPUT (NREQUESTERS), + .DATA_WIDTH ($bits(hpdcache_pma_t)), + .ONE_HOT_SEL (1'b1) + ) core_req_pma_mux_i ( + .data_i (core_req_pma), + .sel_i (arb_req_gnt_q), + .data_o (arb_pma_o) + ); + + // Save the grant signal for the tag in the next cycle + always_ff @(posedge clk_i or negedge rst_ni) + begin : arb_req_gnt_ff + if (!rst_ni) arb_req_gnt_q <= '0; + else arb_req_gnt_q <= arb_req_gnt_d; + end + + assign arb_req_valid_o = |arb_req_gnt_d; + // }}} + + // Response demultiplexor + // {{{ + always_comb + begin : resp_demux + for (int unsigned i = 0; i < NREQUESTERS; i++) begin + core_rsp_valid_o[i] = core_rsp_valid_i && (i == int'(core_rsp_i.sid)); + core_rsp_o[i] = core_rsp_i; + end + end + // }}} +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_ctrl.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_ctrl.sv new file mode 100755 index 00000000000..09fc29eabfc --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_ctrl.sv @@ -0,0 +1,760 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache controller + * History : + */ +module hpdcache_ctrl + // Package imports + // {{{ +import hpdcache_pkg::*; + // }}} + + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + + // Core request interface + input logic core_req_valid_i, + output logic core_req_ready_o, + input hpdcache_req_t core_req_i, + input logic core_req_abort_i, + input hpdcache_tag_t core_req_tag_i, + input hpdcache_pma_t core_req_pma_i, + + // Core response interface + output logic core_rsp_valid_o, + output hpdcache_rsp_t core_rsp_o, + + // Force the write buffer to send all pending writes + input logic wbuf_flush_i, + + // Global control signals + output logic cachedir_hit_o, + + // Miss handler interface + output logic miss_mshr_check_o, + output mshr_set_t miss_mshr_check_set_o, + output mshr_tag_t miss_mshr_check_tag_o, + output logic miss_mshr_alloc_o, + output logic miss_mshr_alloc_cs_o, + input logic miss_mshr_alloc_ready_i, + input logic miss_mshr_alloc_full_i, + output hpdcache_nline_t miss_mshr_alloc_nline_o, + output hpdcache_req_tid_t miss_mshr_alloc_tid_o, + output hpdcache_req_sid_t miss_mshr_alloc_sid_o, + output hpdcache_word_t miss_mshr_alloc_word_o, + output logic miss_mshr_alloc_need_rsp_o, + output logic miss_mshr_alloc_is_prefetch_o, + input logic miss_mshr_hit_i, + + // Refill interface + input logic refill_req_valid_i, + output logic refill_req_ready_o, + input logic refill_busy_i, + input logic refill_updt_plru_i, + input hpdcache_set_t refill_set_i, + input hpdcache_dir_entry_t refill_dir_entry_i, + output hpdcache_way_vector_t refill_victim_way_o, + input hpdcache_way_vector_t refill_victim_way_i, + input logic refill_write_dir_i, + input logic refill_write_data_i, + input hpdcache_word_t refill_word_i, + input hpdcache_refill_data_t refill_data_i, + input logic refill_core_rsp_valid_i, + input hpdcache_rsp_t refill_core_rsp_i, + input hpdcache_nline_t refill_nline_i, + input logic refill_updt_rtab_i, + + // Write buffer interface + input logic wbuf_empty_i, + output logic wbuf_flush_all_o, + output logic wbuf_write_o, + input logic wbuf_write_ready_i, + output wbuf_addr_t wbuf_write_addr_o, + output wbuf_data_t wbuf_write_data_o, + output wbuf_be_t wbuf_write_be_o, + output logic wbuf_write_uncacheable_o, + input logic wbuf_read_hit_i, + output logic wbuf_read_flush_hit_o, + output hpdcache_req_addr_t wbuf_rtab_addr_o, + output logic wbuf_rtab_is_read_o, + input logic wbuf_rtab_hit_open_i, + input logic wbuf_rtab_hit_pend_i, + input logic wbuf_rtab_hit_sent_i, + input logic wbuf_rtab_not_ready_i, + + // Uncacheable request handler + input logic uc_busy_i, + output logic uc_lrsc_snoop_o, + output hpdcache_req_addr_t uc_lrsc_snoop_addr_o, + output hpdcache_req_size_t uc_lrsc_snoop_size_o, + output logic uc_req_valid_o, + output hpdcache_uc_op_t uc_req_op_o, + output hpdcache_req_addr_t uc_req_addr_o, + output hpdcache_req_size_t uc_req_size_o, + output hpdcache_req_data_t uc_req_data_o, + output hpdcache_req_be_t uc_req_be_o, + output logic uc_req_uc_o, + output hpdcache_req_sid_t uc_req_sid_o, + output hpdcache_req_tid_t uc_req_tid_o, + output logic uc_req_need_rsp_o, + input logic uc_wbuf_flush_all_i, + input logic uc_dir_amo_match_i, + input hpdcache_set_t uc_dir_amo_match_set_i, + input hpdcache_tag_t uc_dir_amo_match_tag_i, + input logic uc_dir_amo_update_plru_i, + output hpdcache_way_vector_t uc_dir_amo_hit_way_o, + input logic uc_data_amo_write_i, + input logic uc_data_amo_write_enable_i, + input hpdcache_set_t uc_data_amo_write_set_i, + input hpdcache_req_size_t uc_data_amo_write_size_i, + input hpdcache_word_t uc_data_amo_write_word_i, + input logic [63:0] uc_data_amo_write_data_i, + input logic [7:0] uc_data_amo_write_be_i, + output logic uc_core_rsp_ready_o, + input logic uc_core_rsp_valid_i, + input hpdcache_rsp_t uc_core_rsp_i, + + // Cache Management Operation (CMO) + input logic cmo_busy_i, + output logic cmo_req_valid_o, + output hpdcache_cmoh_op_t cmo_req_op_o, + output hpdcache_req_addr_t cmo_req_addr_o, + output hpdcache_req_data_t cmo_req_wdata_o, + input logic cmo_wbuf_flush_all_i, + input logic cmo_dir_check_i, + input hpdcache_set_t cmo_dir_check_set_i, + input hpdcache_tag_t cmo_dir_check_tag_i, + output hpdcache_way_vector_t cmo_dir_check_hit_way_o, + input logic cmo_dir_inval_i, + input hpdcache_set_t cmo_dir_inval_set_i, + input hpdcache_way_vector_t cmo_dir_inval_way_i, + + output logic rtab_empty_o, + output logic ctrl_empty_o, + + // Configuration signals + input logic cfg_enable_i, + input logic cfg_rtab_single_entry_i, + + // Performance events + output logic evt_cache_write_miss_o, + output logic evt_cache_read_miss_o, + output logic evt_uncached_req_o, + output logic evt_cmo_req_o, + output logic evt_write_req_o, + output logic evt_read_req_o, + output logic evt_prefetch_req_o, + output logic evt_req_on_hold_o, + output logic evt_rtab_rollback_o, + output logic evt_stall_refill_o, + output logic evt_stall_o +); + // }}} + + // Definition of internal registers + // {{{ + logic st1_req_valid_q, st1_req_valid_d; + hpdcache_req_t st1_req_q; + logic st1_req_rtab_q; + rtab_ptr_t st1_rtab_pop_try_ptr_q; + + logic st2_req_valid_q, st2_req_valid_d; + logic st2_req_is_prefetch_q, st2_req_is_prefetch_d; + logic st2_req_need_rsp_q; + hpdcache_req_addr_t st2_req_addr_q; + hpdcache_req_sid_t st2_req_sid_q; + hpdcache_req_tid_t st2_req_tid_q; + // }}} + + // Definition of internal signals + // {{{ + logic [1:0] st0_arb_req; + logic [1:0] st0_arb_req_grant; + logic st0_arb_ready; + + logic st0_req_ready; + + logic st0_req_valid; + hpdcache_req_t st0_req; + logic st0_req_is_uncacheable; + logic st0_req_is_load; + logic st0_req_is_store; + logic st0_req_is_amo; + logic st0_req_is_cmo_fence; + logic st0_req_is_cmo_inval; + logic st0_req_is_cmo_prefetch; + logic st0_req_cachedir_read; + logic st0_req_cachedata_read; + hpdcache_set_t st0_req_set; + hpdcache_word_t st0_req_word; + logic st0_rtab_pop_try_valid; + logic st0_rtab_pop_try_ready; + hpdcache_req_t st0_rtab_pop_try_req; + logic st0_rtab_pop_try_sel; + rtab_ptr_t st0_rtab_pop_try_ptr; + + logic st1_rsp_valid; + logic st1_rsp_aborted; + hpdcache_req_t st1_req; + logic st1_req_abort; + logic st1_req_cachedata_write; + logic st1_req_cachedata_write_enable; + hpdcache_pma_t st1_req_pma; + hpdcache_tag_t st1_req_tag; + hpdcache_set_t st1_req_set; + hpdcache_word_t st1_req_word; + hpdcache_nline_t st1_req_nline; + hpdcache_req_addr_t st1_req_addr; + logic st1_req_updt_lru; + logic st1_req_is_uncacheable; + logic st1_req_is_load; + logic st1_req_is_store; + logic st1_req_is_amo; + logic st1_req_is_amo_lr; + logic st1_req_is_amo_sc; + logic st1_req_is_amo_swap; + logic st1_req_is_amo_add; + logic st1_req_is_amo_and; + logic st1_req_is_amo_or; + logic st1_req_is_amo_xor; + logic st1_req_is_amo_max; + logic st1_req_is_amo_maxu; + logic st1_req_is_amo_min; + logic st1_req_is_amo_minu; + logic st1_req_is_cmo_inval; + logic st1_req_is_cmo_fence; + logic st1_req_is_cmo_prefetch; + hpdcache_way_vector_t st1_dir_hit; + hpdcache_req_data_t st1_read_data; + logic st1_rtab_alloc; + logic st1_rtab_alloc_and_link; + logic st1_rtab_pop_try_commit; + logic st1_rtab_pop_try_rback; + logic st1_rtab_mshr_hit; + logic st1_rtab_mshr_full; + logic st1_rtab_mshr_ready; + logic st1_rtab_wbuf_hit; + logic st1_rtab_wbuf_not_ready; + logic st1_rtab_check; + logic st1_rtab_check_hit; + + logic st2_req_we; + hpdcache_word_t st2_req_word; + + logic rtab_full; + + logic hpdcache_init_ready; + // }}} + + // Decoding of the request + // {{{ + // Select between request in the replay table or a new core requests + assign st0_req_valid = st0_rtab_pop_try_sel ? st0_rtab_pop_try_valid + : core_req_valid_i, + st0_req.addr_offset = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.addr_offset + : core_req_i.addr_offset, + st0_req.addr_tag = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.addr_tag + : core_req_i.addr_tag, + st0_req.wdata = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.wdata + : core_req_i.wdata, + st0_req.op = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.op + : core_req_i.op, + st0_req.be = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.be + : core_req_i.be, + st0_req.size = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.size + : core_req_i.size, + st0_req.sid = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.sid + : core_req_i.sid, + st0_req.tid = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.tid + : core_req_i.tid, + st0_req.need_rsp = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.need_rsp + : core_req_i.need_rsp, + st0_req.phys_indexed = st0_rtab_pop_try_sel ? 1'b1 + : core_req_i.phys_indexed, + st0_req.pma = st0_rtab_pop_try_sel ? st0_rtab_pop_try_req.pma + : core_req_i.pma; + + // Decode operation in stage 0 + assign st0_req_is_uncacheable = ~cfg_enable_i | ( st0_req.phys_indexed + & st0_req.pma.uncacheable), + st0_req_is_load = is_load(st0_req.op), + st0_req_is_store = is_store(st0_req.op), + st0_req_is_amo = is_amo(st0_req.op), + st0_req_is_cmo_fence = is_cmo_fence(st0_req.op, st0_req.size), + st0_req_is_cmo_inval = is_cmo_inval(st0_req.op, st0_req.size), + st0_req_is_cmo_prefetch = is_cmo_prefetch(st0_req.op, st0_req.size); + + // Decode operation in stage 1 + + // In case of replay or physically-indexed cache, the tag and PMA come + // from stage 0. Otherwise, this information come directly from the + // requester in stage 1 + assign st1_req_tag = st1_req_q.phys_indexed ? st1_req_q.addr_tag : core_req_tag_i, + st1_req_pma = st1_req_q.phys_indexed ? st1_req_q.pma : core_req_pma_i; + + assign st1_req.addr_offset = st1_req_q.addr_offset, + st1_req.addr_tag = st1_req_rtab_q ? st1_req_q.addr_tag : st1_req_tag, + st1_req.wdata = st1_req_q.wdata, + st1_req.op = st1_req_q.op, + st1_req.be = st1_req_q.be, + st1_req.size = st1_req_q.size, + st1_req.sid = st1_req_q.sid, + st1_req.tid = st1_req_q.tid, + st1_req.need_rsp = st1_req_q.need_rsp, + st1_req.phys_indexed = st1_req_q.phys_indexed, + st1_req.pma = st1_req_rtab_q ? st1_req_q.pma : st1_req_pma; + + // A requester can ask to abort a request it initiated on the + // previous cycle (stage 0). Useful in case of TLB miss for example + assign st1_req_abort = core_req_abort_i & ~st1_req.phys_indexed; + + assign st1_req_is_uncacheable = ~cfg_enable_i | st1_req.pma.uncacheable, + st1_req_is_load = is_load(st1_req.op), + st1_req_is_store = is_store(st1_req.op), + st1_req_is_amo = is_amo(st1_req.op), + st1_req_is_amo_lr = is_amo_lr(st1_req.op), + st1_req_is_amo_sc = is_amo_sc(st1_req.op), + st1_req_is_amo_swap = is_amo_swap(st1_req.op), + st1_req_is_amo_add = is_amo_add(st1_req.op), + st1_req_is_amo_and = is_amo_and(st1_req.op), + st1_req_is_amo_or = is_amo_or(st1_req.op), + st1_req_is_amo_xor = is_amo_xor(st1_req.op), + st1_req_is_amo_max = is_amo_max(st1_req.op), + st1_req_is_amo_maxu = is_amo_maxu(st1_req.op), + st1_req_is_amo_min = is_amo_min(st1_req.op), + st1_req_is_amo_minu = is_amo_minu(st1_req.op), + st1_req_is_cmo_inval = is_cmo_inval(st1_req.op, st1_req.size), + st1_req_is_cmo_fence = is_cmo_fence(st1_req.op, st1_req.size), + st1_req_is_cmo_prefetch = is_cmo_prefetch(st1_req.op, st1_req.size); + // }}} + + // Refill arbiter: it arbitrates between normal requests (from the core, + // coprocessor, prefetch) and refill requests (from the miss handler). + // + // TODO This arbiter could be replaced by a weighted-round-robin arbiter. + // This way we could distribute asymetrically the bandwidth to the core + // and the refill interfaces. + // {{{ + hpdcache_rrarb #(.N(2)) st0_arb_i + ( + .clk_i, + .rst_ni, + .req_i (st0_arb_req), + .gnt_o (st0_arb_req_grant), + .ready_i (st0_arb_ready) + ); + + // The arbiter can cycle the priority token when: + // - The granted request is consumed (req_grant & req_valid & req_ready) + // - The granted request is aborted (req_grant & ~req_valid) + assign st0_arb_ready = ((st0_arb_req_grant[0] & st0_req_valid & st0_req_ready ) | + (st0_arb_req_grant[1] & refill_req_valid_i & refill_req_ready_o) | + (st0_arb_req_grant[0] & ~st0_req_valid ) | + (st0_arb_req_grant[1] & ~refill_req_valid_i)); + + assign st0_arb_req[0] = st0_req_valid, + st0_arb_req[1] = refill_req_valid_i; + + assign core_req_ready_o = st0_req_ready & ~st0_rtab_pop_try_sel, + st0_rtab_pop_try_ready = st0_req_ready & st0_rtab_pop_try_sel; + + // Trigger an event signal when the pipeline is stalled (new request is not consumed) + assign evt_stall_o = core_req_valid_i & ~core_req_ready_o; + // }}} + + // Cache controller protocol engine + // {{{ + hpdcache_ctrl_pe hpdcache_ctrl_pe_i( + .arb_st0_req_valid_i (st0_req_valid & st0_arb_req_grant[0]), + .arb_st0_req_ready_o (st0_req_ready), + .arb_refill_valid_i (refill_req_valid_i & st0_arb_req_grant[1]), + .arb_refill_ready_o (refill_req_ready_o), + .st0_req_is_uncacheable_i (st0_req_is_uncacheable), + .st0_req_need_rsp_i (st0_req.need_rsp), + .st0_req_is_load_i (st0_req_is_load), + .st0_req_is_store_i (st0_req_is_store), + .st0_req_is_amo_i (st0_req_is_amo), + .st0_req_is_cmo_fence_i (st0_req_is_cmo_fence), + .st0_req_is_cmo_inval_i (st0_req_is_cmo_inval), + .st0_req_is_cmo_prefetch_i (st0_req_is_cmo_prefetch), + .st0_req_mshr_check_o (miss_mshr_check_o), + .st0_req_cachedir_read_o (st0_req_cachedir_read), + .st0_req_cachedata_read_o (st0_req_cachedata_read), + + .st1_req_valid_i (st1_req_valid_q), + .st1_req_abort_i (st1_req_abort), + .st1_req_rtab_i (st1_req_rtab_q), + .st1_req_is_uncacheable_i (st1_req_is_uncacheable), + .st1_req_need_rsp_i (st1_req.need_rsp), + .st1_req_is_load_i (st1_req_is_load), + .st1_req_is_store_i (st1_req_is_store), + .st1_req_is_amo_i (st1_req_is_amo), + .st1_req_is_cmo_inval_i (st1_req_is_cmo_inval), + .st1_req_is_cmo_fence_i (st1_req_is_cmo_fence), + .st1_req_is_cmo_prefetch_i (st1_req_is_cmo_prefetch), + .st1_req_valid_o (st1_req_valid_d), + .st1_rsp_valid_o (st1_rsp_valid), + .st1_rsp_aborted_o (st1_rsp_aborted), + .st1_req_cachedir_updt_lru_o (st1_req_updt_lru), + .st1_req_cachedata_write_o (st1_req_cachedata_write), + .st1_req_cachedata_write_enable_o (st1_req_cachedata_write_enable), + + .st2_req_valid_i (st2_req_valid_q), + .st2_req_is_prefetch_i (st2_req_is_prefetch_q), + .st2_req_valid_o (st2_req_valid_d), + .st2_req_we_o (st2_req_we), + .st2_req_is_prefetch_o (st2_req_is_prefetch_d), + .st2_req_mshr_alloc_o (miss_mshr_alloc_o), + .st2_req_mshr_alloc_cs_o (miss_mshr_alloc_cs_o), + + .rtab_full_i (rtab_full), + .rtab_req_valid_i (st0_rtab_pop_try_valid), + .rtab_sel_o (st0_rtab_pop_try_sel), + .rtab_check_o (st1_rtab_check), + .rtab_check_hit_i (st1_rtab_check_hit), + .st1_rtab_alloc_o (st1_rtab_alloc), + .st1_rtab_alloc_and_link_o (st1_rtab_alloc_and_link), + .st1_rtab_commit_o (st1_rtab_pop_try_commit), + .st1_rtab_rback_o (st1_rtab_pop_try_rback), + .st1_rtab_mshr_hit_o (st1_rtab_mshr_hit), + .st1_rtab_mshr_full_o (st1_rtab_mshr_full), + .st1_rtab_mshr_ready_o (st1_rtab_mshr_ready), + .st1_rtab_wbuf_hit_o (st1_rtab_wbuf_hit), + .st1_rtab_wbuf_not_ready_o (st1_rtab_wbuf_not_ready), + + .cachedir_hit_i (cachedir_hit_o), + .cachedir_init_ready_i (hpdcache_init_ready), + + .mshr_alloc_ready_i (miss_mshr_alloc_ready_i), + .mshr_hit_i (miss_mshr_hit_i), + .mshr_full_i (miss_mshr_alloc_full_i), + + .refill_busy_i, + .refill_core_rsp_valid_i, + + .wbuf_write_valid_o (wbuf_write_o), + .wbuf_write_ready_i, + .wbuf_read_hit_i, + .wbuf_write_uncacheable_o, + .wbuf_read_flush_hit_o, + + .uc_busy_i, + .uc_req_valid_o, + .uc_core_rsp_ready_o, + + .cmo_busy_i, + .cmo_req_valid_o, + + .evt_cache_write_miss_o, + .evt_cache_read_miss_o, + .evt_uncached_req_o, + .evt_cmo_req_o, + .evt_write_req_o, + .evt_read_req_o, + .evt_prefetch_req_o, + .evt_req_on_hold_o, + .evt_rtab_rollback_o, + .evt_stall_refill_o + ); + + assign ctrl_empty_o = ~(st1_req_valid_q | st2_req_valid_q); + // }}} + + // Replay table + // {{{ + hpdcache_rtab #( + .rtab_entry_t (hpdcache_req_t) + ) hpdcache_rtab_i( + .clk_i, + .rst_ni, + + .empty_o (rtab_empty_o), + .full_o (rtab_full), + + .check_i (st1_rtab_check), + .check_nline_i (st1_req_nline), + .check_hit_o (st1_rtab_check_hit), + + .alloc_i (st1_rtab_alloc), + .alloc_and_link_i (st1_rtab_alloc_and_link), + .alloc_req_i (st1_req), + .alloc_mshr_hit_i (st1_rtab_mshr_hit), + .alloc_mshr_full_i (st1_rtab_mshr_full), + .alloc_mshr_ready_i (st1_rtab_mshr_ready), + .alloc_wbuf_hit_i (st1_rtab_wbuf_hit), + .alloc_wbuf_not_ready_i (st1_rtab_wbuf_not_ready), + + .pop_try_valid_o (st0_rtab_pop_try_valid), + .pop_try_i (st0_rtab_pop_try_ready), + .pop_try_req_o (st0_rtab_pop_try_req), + .pop_try_ptr_o (st0_rtab_pop_try_ptr), + + .pop_commit_i (st1_rtab_pop_try_commit), + .pop_commit_ptr_i (st1_rtab_pop_try_ptr_q), + + .pop_rback_i (st1_rtab_pop_try_rback), + .pop_rback_ptr_i (st1_rtab_pop_try_ptr_q), + .pop_rback_mshr_hit_i (st1_rtab_mshr_hit), + .pop_rback_mshr_full_i (st1_rtab_mshr_full), + .pop_rback_mshr_ready_i (st1_rtab_mshr_ready), + .pop_rback_wbuf_hit_i (st1_rtab_wbuf_hit), + .pop_rback_wbuf_not_ready_i (st1_rtab_wbuf_not_ready), + + .wbuf_addr_o (wbuf_rtab_addr_o), + .wbuf_is_read_o (wbuf_rtab_is_read_o), + .wbuf_hit_open_i (wbuf_rtab_hit_open_i), + .wbuf_hit_pend_i (wbuf_rtab_hit_pend_i), + .wbuf_hit_sent_i (wbuf_rtab_hit_sent_i), + .wbuf_not_ready_i (wbuf_rtab_not_ready_i), + + .miss_ready_i (miss_mshr_alloc_ready_i), + + .refill_i (refill_updt_rtab_i), + .refill_nline_i, + + .cfg_single_entry_i (cfg_rtab_single_entry_i) + ); + // }}} + + // Pipeline stage 1 registers + // {{{ + always_ff @(posedge clk_i) + begin : st1_req_payload_ff + if (st0_req_ready) begin + st1_req_q <= st0_req; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : st1_req_valid_ff + if (!rst_ni) begin + st1_req_valid_q <= 1'b0; + st1_req_rtab_q <= 1'b0; + st1_rtab_pop_try_ptr_q <= '0; + end else begin + st1_req_valid_q <= st1_req_valid_d; + if (st0_req_ready) begin + st1_req_rtab_q <= st0_rtab_pop_try_sel; + if (st0_rtab_pop_try_sel) begin + st1_rtab_pop_try_ptr_q <= st0_rtab_pop_try_ptr; + end + end + end + end + // }}} + + // Pipeline stage 2 registers + // {{{ + always_ff @(posedge clk_i) + begin : st2_req_payload_ff + if (st2_req_we) begin + st2_req_need_rsp_q <= st1_req.need_rsp; + st2_req_addr_q <= st1_req_addr; + st2_req_sid_q <= st1_req.sid; + st2_req_tid_q <= st1_req.tid; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : st2_req_valid_ff + if (!rst_ni) begin + st2_req_valid_q <= 1'b0; + st2_req_is_prefetch_q <= 1'b0; + end else begin + st2_req_valid_q <= st2_req_valid_d; + st2_req_is_prefetch_q <= st2_req_is_prefetch_d; + end + end + // }}} + + // Controller for the HPDcache directory and data memory arrays + // {{{ + assign st0_req_set = hpdcache_get_req_offset_set(st0_req.addr_offset), + st0_req_word = hpdcache_get_req_offset_word(st0_req.addr_offset), + st1_req_set = hpdcache_get_req_offset_set(st1_req.addr_offset), + st1_req_word = hpdcache_get_req_offset_word(st1_req.addr_offset), + st1_req_addr = {st1_req.addr_tag, st1_req.addr_offset}, + st1_req_nline = hpdcache_get_req_addr_nline(st1_req_addr), + st2_req_word = hpdcache_get_req_addr_word(st2_req_addr_q); + + hpdcache_memctrl hpdcache_memctrl_i ( + .clk_i, + .rst_ni, + + .ready_o (hpdcache_init_ready), + + .dir_match_i (st0_req_cachedir_read), + .dir_match_set_i (st0_req_set), + .dir_match_tag_i (st1_req.addr_tag), + .dir_update_lru_i (st1_req_updt_lru), + .dir_hit_way_o (st1_dir_hit), + + .dir_amo_match_i (uc_dir_amo_match_i), + .dir_amo_match_set_i (uc_dir_amo_match_set_i), + .dir_amo_match_tag_i (uc_dir_amo_match_tag_i), + .dir_amo_update_plru_i (uc_dir_amo_update_plru_i), + .dir_amo_hit_way_o (uc_dir_amo_hit_way_o), + + .dir_refill_i (refill_write_dir_i), + .dir_refill_set_i (refill_set_i), + .dir_refill_entry_i (refill_dir_entry_i), + .dir_refill_updt_plru_i (refill_updt_plru_i), + .dir_victim_way_o (refill_victim_way_o), + + .dir_cmo_check_i (cmo_dir_check_i), + .dir_cmo_check_set_i (cmo_dir_check_set_i), + .dir_cmo_check_tag_i (cmo_dir_check_tag_i), + .dir_cmo_check_hit_way_o (cmo_dir_check_hit_way_o), + + .dir_cmo_inval_i (cmo_dir_inval_i), + .dir_cmo_inval_set_i (cmo_dir_inval_set_i), + .dir_cmo_inval_way_i (cmo_dir_inval_way_i), + + .data_req_read_i (st0_req_cachedata_read), + .data_req_read_set_i (st0_req_set), + .data_req_read_size_i (st0_req.size), + .data_req_read_word_i (st0_req_word), + .data_req_read_data_o (st1_read_data), + + .data_req_write_i (st1_req_cachedata_write), + .data_req_write_enable_i (st1_req_cachedata_write_enable), + .data_req_write_set_i (st1_req_set), + .data_req_write_size_i (st1_req.size), + .data_req_write_word_i (st1_req_word), + .data_req_write_data_i (st1_req.wdata), + .data_req_write_be_i (st1_req.be), + + .data_amo_write_i (uc_data_amo_write_i), + .data_amo_write_enable_i (uc_data_amo_write_enable_i), + .data_amo_write_set_i (uc_data_amo_write_set_i), + .data_amo_write_size_i (uc_data_amo_write_size_i), + .data_amo_write_word_i (uc_data_amo_write_word_i), + .data_amo_write_data_i (uc_data_amo_write_data_i), + .data_amo_write_be_i (uc_data_amo_write_be_i), + + .data_refill_i (refill_write_data_i), + .data_refill_way_i (refill_victim_way_i), + .data_refill_set_i (refill_set_i), + .data_refill_word_i (refill_word_i), + .data_refill_data_i (refill_data_i) + ); + + assign cachedir_hit_o = |st1_dir_hit; + // }}} + + // Write buffer outputs + // {{{ + assign wbuf_write_addr_o = st1_req_addr, + wbuf_write_data_o = st1_req.wdata, + wbuf_write_be_o = st1_req.be, + wbuf_flush_all_o = cmo_wbuf_flush_all_i | uc_wbuf_flush_all_i | wbuf_flush_i; + // }}} + + // Miss handler outputs + // {{{ + assign miss_mshr_check_set_o = + st0_req.addr_offset[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_MSHR_SET_WIDTH]; + assign miss_mshr_check_tag_o = + st1_req_nline[HPDCACHE_MSHR_SET_WIDTH +: HPDCACHE_MSHR_TAG_WIDTH]; + + assign miss_mshr_alloc_nline_o = hpdcache_get_req_addr_nline(st2_req_addr_q), + miss_mshr_alloc_tid_o = st2_req_tid_q, + miss_mshr_alloc_sid_o = st2_req_sid_q, + miss_mshr_alloc_word_o = st2_req_word, + miss_mshr_alloc_need_rsp_o = st2_req_need_rsp_q, + miss_mshr_alloc_is_prefetch_o = st2_req_is_prefetch_q; + // }}} + + // Uncacheable request handler outputs + // {{{ + assign uc_lrsc_snoop_o = st1_req_valid_q & st1_req_is_store, + uc_lrsc_snoop_addr_o = st1_req_addr, + uc_lrsc_snoop_size_o = st1_req.size, + uc_req_addr_o = st1_req_addr, + uc_req_size_o = st1_req.size, + uc_req_data_o = st1_req.wdata, + uc_req_be_o = st1_req.be, + uc_req_uc_o = st1_req_is_uncacheable, + uc_req_sid_o = st1_req.sid, + uc_req_tid_o = st1_req.tid, + uc_req_need_rsp_o = st1_req.need_rsp, + uc_req_op_o.is_ld = st1_req_is_load, + uc_req_op_o.is_st = st1_req_is_store, + uc_req_op_o.is_amo_lr = st1_req_is_amo_lr, + uc_req_op_o.is_amo_sc = st1_req_is_amo_sc, + uc_req_op_o.is_amo_swap = st1_req_is_amo_swap, + uc_req_op_o.is_amo_add = st1_req_is_amo_add, + uc_req_op_o.is_amo_and = st1_req_is_amo_and, + uc_req_op_o.is_amo_or = st1_req_is_amo_or, + uc_req_op_o.is_amo_xor = st1_req_is_amo_xor, + uc_req_op_o.is_amo_max = st1_req_is_amo_max, + uc_req_op_o.is_amo_maxu = st1_req_is_amo_maxu, + uc_req_op_o.is_amo_min = st1_req_is_amo_min, + uc_req_op_o.is_amo_minu = st1_req_is_amo_minu; + // }}} + + // CMO request handler outputs + // {{{ + assign cmo_req_addr_o = st1_req_addr, + cmo_req_wdata_o = st1_req.wdata, + cmo_req_op_o.is_fence = st1_req_is_cmo_fence, + cmo_req_op_o.is_inval_by_nline = st1_req_is_cmo_inval & + is_cmo_inval_by_nline(st1_req.size), + cmo_req_op_o.is_inval_by_set = st1_req_is_cmo_inval & + is_cmo_inval_by_set(st1_req.size), + cmo_req_op_o.is_inval_all = st1_req_is_cmo_inval & + is_cmo_inval_all(st1_req.size); + // }}} + + // Control of the response to the core + // {{{ + assign core_rsp_valid_o = refill_core_rsp_valid_i | + (uc_core_rsp_valid_i & uc_core_rsp_ready_o) | + st1_rsp_valid, + core_rsp_o.rdata = (refill_core_rsp_valid_i ? refill_core_rsp_i.rdata : + (uc_core_rsp_valid_i ? uc_core_rsp_i.rdata : + st1_read_data)), + core_rsp_o.sid = (refill_core_rsp_valid_i ? refill_core_rsp_i.sid : + (uc_core_rsp_valid_i ? uc_core_rsp_i.sid : + st1_req.sid)), + core_rsp_o.tid = (refill_core_rsp_valid_i ? refill_core_rsp_i.tid : + (uc_core_rsp_valid_i ? uc_core_rsp_i.tid : + st1_req.tid)), + core_rsp_o.error = (refill_core_rsp_valid_i ? refill_core_rsp_i.error : + (uc_core_rsp_valid_i ? uc_core_rsp_i.error : + /* FIXME */1'b0)), + core_rsp_o.aborted = st1_rsp_aborted; + // }}} + + // Assertions + // pragma translate_off + // {{{ + assert property (@(posedge clk_i) + $onehot0({core_req_ready_o, st0_rtab_pop_try_ready, refill_req_ready_o})) else + $error("ctrl: only one request can be served per cycle"); + // }}} + // pragma translate_on +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_ctrl_pe.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_ctrl_pe.sv new file mode 100755 index 00000000000..13b4f5883cd --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_ctrl_pe.sv @@ -0,0 +1,620 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Control Protocol Engine + * History : + */ +module hpdcache_ctrl_pe + // Ports + // {{{ +( + // Refill arbiter + // {{{ + input logic arb_st0_req_valid_i, + output logic arb_st0_req_ready_o, + input logic arb_refill_valid_i, + output logic arb_refill_ready_o, + // }}} + + // Pipeline stage 0 + // {{{ + input logic st0_req_is_uncacheable_i, + input logic st0_req_need_rsp_i, + input logic st0_req_is_load_i, + input logic st0_req_is_store_i, + input logic st0_req_is_amo_i, + input logic st0_req_is_cmo_fence_i, + input logic st0_req_is_cmo_inval_i, + input logic st0_req_is_cmo_prefetch_i, + output logic st0_req_mshr_check_o, + output logic st0_req_cachedir_read_o, + output logic st0_req_cachedata_read_o, + // }}} + + // Pipeline stage 1 + // {{{ + input logic st1_req_valid_i, + input logic st1_req_abort_i, + input logic st1_req_rtab_i, + input logic st1_req_is_uncacheable_i, + input logic st1_req_need_rsp_i, + input logic st1_req_is_load_i, + input logic st1_req_is_store_i, + input logic st1_req_is_amo_i, + input logic st1_req_is_cmo_inval_i, + input logic st1_req_is_cmo_fence_i, + input logic st1_req_is_cmo_prefetch_i, + output logic st1_req_valid_o, + output logic st1_rsp_valid_o, + output logic st1_rsp_aborted_o, + output logic st1_req_cachedir_updt_lru_o, + output logic st1_req_cachedata_write_o, + output logic st1_req_cachedata_write_enable_o, + // }}} + + // Pipeline stage 2 + // {{{ + input logic st2_req_valid_i, + input logic st2_req_is_prefetch_i, + output logic st2_req_valid_o, + output logic st2_req_we_o, + output logic st2_req_is_prefetch_o, + output logic st2_req_mshr_alloc_o, + output logic st2_req_mshr_alloc_cs_o, + // }}} + + // Replay + // {{{ + input logic rtab_full_i, + input logic rtab_req_valid_i, + output logic rtab_sel_o, + output logic rtab_check_o, + input logic rtab_check_hit_i, + output logic st1_rtab_alloc_o, + output logic st1_rtab_alloc_and_link_o, + output logic st1_rtab_commit_o, + output logic st1_rtab_rback_o, + output logic st1_rtab_mshr_hit_o, + output logic st1_rtab_mshr_full_o, + output logic st1_rtab_mshr_ready_o, + output logic st1_rtab_wbuf_hit_o, + output logic st1_rtab_wbuf_not_ready_o, + // }}} + + // Cache directory + // {{{ + input logic cachedir_hit_i, + input logic cachedir_init_ready_i, + // }}} + + // Miss Status Holding Register (MSHR) + // {{{ + input logic mshr_alloc_ready_i, + input logic mshr_hit_i, + input logic mshr_full_i, + // }}} + + // Refill interface + // {{{ + input logic refill_busy_i, + input logic refill_core_rsp_valid_i, + // }}} + + // Write buffer + // {{{ + input logic wbuf_write_ready_i, + input logic wbuf_read_hit_i, + output logic wbuf_write_valid_o, + output logic wbuf_write_uncacheable_o, + output logic wbuf_read_flush_hit_o, + // }}} + + // Uncacheable request handler + // {{{ + input logic uc_busy_i, + output logic uc_req_valid_o, + output logic uc_core_rsp_ready_o, + // }}} + + // Cache Management Operation (CMO) + // {{{ + input logic cmo_busy_i, + output logic cmo_req_valid_o, + // }}} + + // Performance events + // {{{ + output logic evt_cache_write_miss_o, + output logic evt_cache_read_miss_o, + output logic evt_uncached_req_o, + output logic evt_cmo_req_o, + output logic evt_write_req_o, + output logic evt_read_req_o, + output logic evt_prefetch_req_o, + output logic evt_req_on_hold_o, + output logic evt_rtab_rollback_o, + output logic evt_stall_refill_o + // }}} +); + // }}} + + // Definition of internal signals + // {{{ + logic st1_fence; + logic st1_rtab_alloc, st1_rtab_alloc_and_link; + // }}} + + // Global control signals + // {{{ + + // Determine if the new request is a "fence". Here, fence instructions are + // considered those that need to be executed in program order + // (irrespectively of addresses). This means that all memory operations + // arrived before the "fence" instruction need to be finished, and only + // then the "fence" instruction is executed. In the same manner, all + // instructions following the "fence" need to wait the completion of this + // last before being executed. + assign st1_fence = st1_req_is_uncacheable_i | + st1_req_is_cmo_fence_i | + st1_req_is_cmo_inval_i | + st1_req_is_amo_i; + // }}} + + // Arbitration of responses to the core + // {{{ + assign uc_core_rsp_ready_o = ~refill_core_rsp_valid_i; + // }}} + + // Arbiter between core or replay request. + // {{{ + // Take the replay request when: + // - The replay table is full. + // - The replay table has a ready request (request with all dependencies solved) + // - There is an outstanding CMO or uncached/AMO request + // + // IMPORTANT: When the replay table is full, the cache cannot accept new core + // requests because this can introduce a dead-lock : If the core request needs to + // be put on hold, as there is no place the replay table, the pipeline needs to + // stall. If the pipeline is stalled, dependencies of on-hold requests cannot be + // solved, and the system is locked. + assign rtab_sel_o = rtab_full_i | + rtab_req_valid_i | + (st1_req_valid_i & st1_fence) | + cmo_busy_i | + uc_busy_i; + // }}} + + // Replay logic + // {{{ + // Replay table allocation + assign st1_rtab_alloc_o = st1_rtab_alloc & ~st1_req_rtab_i, + st1_rtab_alloc_and_link_o = st1_rtab_alloc_and_link, + st1_rtab_rback_o = st1_rtab_alloc & st1_req_rtab_i; + + // Performance event + assign evt_req_on_hold_o = st1_rtab_alloc | st1_rtab_alloc_and_link, + evt_rtab_rollback_o = st1_rtab_rback_o; + // }}} + + // Data-cache control lines + // {{{ + always_comb + begin : hpdcache_ctrl_comb + automatic logic nop, st1_nop, st2_nop; + + uc_req_valid_o = 1'b0; + + cmo_req_valid_o = 1'b0; + + wbuf_write_valid_o = 1'b0; + wbuf_read_flush_hit_o = 1'b0; + wbuf_write_uncacheable_o = 1'b0; // unused + + arb_st0_req_ready_o = 1'b0; + arb_refill_ready_o = 1'b0; + + st0_req_mshr_check_o = 1'b0; + st0_req_cachedir_read_o = 1'b0; + st0_req_cachedata_read_o = 1'b0; + + st1_req_valid_o = st1_req_valid_i; + st1_nop = 1'b0; + st1_req_cachedata_write_o = 1'b0; + st1_req_cachedata_write_enable_o = 1'b0; + st1_req_cachedir_updt_lru_o = 1'b0; + st1_rsp_valid_o = 1'b0; + st1_rsp_aborted_o = 1'b0; + + st2_req_valid_o = st2_req_valid_i; + st2_req_we_o = 1'b0; + st2_req_is_prefetch_o = 1'b0; + st2_req_mshr_alloc_cs_o = 1'b0; + st2_req_mshr_alloc_o = 1'b0; + st2_nop = 1'b0; + + nop = 1'b0; + + rtab_check_o = 1'b0; + st1_rtab_alloc = 1'b0; + st1_rtab_alloc_and_link = 1'b0; + st1_rtab_commit_o = 1'b0; + st1_rtab_mshr_hit_o = 1'b0; + st1_rtab_mshr_full_o = 1'b0; + st1_rtab_mshr_ready_o = 1'b0; + st1_rtab_wbuf_hit_o = 1'b0; + st1_rtab_wbuf_not_ready_o = 1'b0; + + evt_cache_write_miss_o = 1'b0; + evt_cache_read_miss_o = 1'b0; + evt_uncached_req_o = 1'b0; + evt_cmo_req_o = 1'b0; + evt_write_req_o = 1'b0; + evt_read_req_o = 1'b0; + evt_prefetch_req_o = 1'b0; + evt_stall_refill_o = 1'b0; + + // Wait for the cache to be initialized + // {{{ + if (!cachedir_init_ready_i) begin + // initialization of the cache RAMs + end + // }}} + + // Refilling the cache + // {{{ + else if (refill_busy_i) begin + // miss handler has the control of the cache + evt_stall_refill_o = arb_st0_req_valid_i; + end + // }}} + + // Normal pipeline operation + // {{{ + else begin + // Stage 2 request pending + // {{{ + if (st2_req_valid_i) begin + st2_req_valid_o = 1'b0; + + // Allocate an entry in the MSHR + st2_req_mshr_alloc_cs_o = 1'b1; + st2_req_mshr_alloc_o = 1'b1; + + // Introduce a NOP in the next cycle to prevent a hazard on the MSHR + st2_nop = 1'b1; + + // Performance event + evt_cache_read_miss_o = ~st2_req_is_prefetch_i; + evt_read_req_o = ~st2_req_is_prefetch_i; + evt_prefetch_req_o = st2_req_is_prefetch_i; + end + // }}} + + // Stage 1 request pending + // {{{ + if (st1_req_valid_i) begin + // Check if the request in stage 1 has a conflict with one of the + // request in the replay table. + rtab_check_o = ~st1_req_rtab_i & ~st1_fence; + + // Check if the current request is aborted. If so, respond to the + // core (when need_rsp is set) and set the aborted flag + if (st1_req_abort_i && !st1_req_rtab_i) begin + st1_rsp_valid_o = st1_req_need_rsp_i; + st1_rsp_aborted_o = 1'b1; + end + + // Allocate a new entry in the replay table in case of conflict with + // an on-hold request + else if (rtab_check_o && rtab_check_hit_i) begin + st1_rtab_alloc_and_link = 1'b1; + + // Do not consume a request in this cycle in stage 0 + st1_nop = 1'b1; + end + + // CMO fence or invalidate + // {{{ + else if (st1_req_is_cmo_fence_i || st1_req_is_cmo_inval_i) begin + cmo_req_valid_o = 1'b1; + st1_nop = 1'b1; + + // Performance event + evt_cmo_req_o = 1'b1; + end + // }}} + + // Uncacheable load, store or AMO request + // {{{ + else if (st1_req_is_uncacheable_i) begin + uc_req_valid_o = 1'b1; + st1_nop = 1'b1; + + // Performance event + evt_uncached_req_o = 1'b1; + end + // }}} + + // Cacheable request + // {{{ + else begin + // AMO cacheable request + // {{{ + if (st1_req_is_amo_i) begin + uc_req_valid_o = 1'b1; + st1_nop = 1'b1; + + // Performance event + evt_uncached_req_o = 1'b1; + end + // }}} + + // Load cacheable request + // {{{ + if (|{st1_req_is_load_i, + st1_req_is_cmo_prefetch_i}) + begin + // Cache miss + // {{{ + if (!cachedir_hit_i) begin + // If there is a match in the write buffer, lets send the + // entry right away + wbuf_read_flush_hit_o = 1'b1; + + // Do not consume a request in this cycle in stage 0 + st1_nop = 1'b1; + + // Pending miss on the same line + if (mshr_hit_i) begin + // Put the request in the replay table + st1_rtab_alloc = 1'b1; + + st1_rtab_mshr_hit_o = 1'b1; + end + + // No available slot in the MSHR + else if (mshr_full_i) begin + // Put the request in the replay table + st1_rtab_alloc = 1'b1; + + st1_rtab_mshr_full_o = 1'b1; + end + + // Hit on an open entry of the write buffer: + // wait for the entry to be acknowledged + else if (wbuf_read_hit_i) begin + // Put the request in the replay table + st1_rtab_alloc = 1'b1; + + st1_rtab_wbuf_hit_o = 1'b1; + end + + // Miss Handler is not ready to send + else if (!mshr_alloc_ready_i) begin + // Put the request on hold if the MISS HANDLER is not + // ready to send a new miss request. This is to prevent + // a deadlock between the read request channel and the + // read response channel. + // + // The request channel may be stalled by targets if they + // are not able to send a response (response is + // prioritary). Therefore, we need to put the request on + // hold to allow a possible refill read response to be + // accomplished. + st1_rtab_alloc = 1'b1; + + st1_rtab_mshr_ready_o = 1'b1; + end + + // Forward the request to the next stage to allocate the + // entry in the MSHR and send the refill request + else begin + // If the request comes from the replay table, free the + // corresponding RTAB entry + st1_rtab_commit_o = st1_req_rtab_i; + + st2_req_valid_o = 1'b1; + st2_req_we_o = 1'b1; + st2_req_is_prefetch_o = st1_req_is_cmo_prefetch_i; + end + end + // }}} + + // Cache hit + // {{{ + else begin + // If the request comes from the replay table, free the + // corresponding RTAB entry + st1_rtab_commit_o = st1_req_rtab_i; + + // Add a NOP when replaying a request, and there is no available + // request from the replay table. + st1_nop = st1_req_rtab_i & ~rtab_sel_o; + + // Update the PLRU bit for the accessed set + st1_req_cachedir_updt_lru_o = st1_req_is_load_i; + + // Respond to the core (if needed) + st1_rsp_valid_o = st1_req_need_rsp_i; + + // Performance event + evt_read_req_o = ~st1_req_is_cmo_prefetch_i; + evt_prefetch_req_o = st1_req_is_cmo_prefetch_i; + end + // }}} + end + // }}} + + // Store cacheable request + // {{{ + if (st1_req_is_store_i) begin + // Write in the write buffer if there is no pending miss in the same line. + // + // We assume here that the NoC that transports read and write transactions does + // not guaranty the order between transactions on those channels. + // Therefore, the cache must hold a write if there is a pending read on the + // same address. + wbuf_write_valid_o = ~mshr_hit_i; + + // Add a NOP in the pipeline when: + // - Structural hazard on the cache data if the st0 request is a load + // operation. + // - Replaying a request, the cache cannot accept a request from the + // core the next cycle. It can however accept a new request from the + // replay table + // + // IMPORTANT: we could remove the NOP in the first scenario if the + // controller checks for the hit of this write. However, this adds + // a DIR_RAM -> DATA_RAM timing path. + st1_nop = (arb_st0_req_valid_i & st0_req_is_load_i) | + (st1_req_rtab_i & ~rtab_sel_o); + + // Enable the data RAM in case of write. However, the actual write + // depends on the hit signal from the cache directory. + // + // IMPORTANT: this produces unnecessary power consumption in case of + // write misses, but removes timing paths between the cache directory + // RAM and the data RAM chip-select. + st1_req_cachedata_write_o = 1'b1; + + // Cache miss + if (!cachedir_hit_i) begin + // Pending miss on the same line + if (mshr_hit_i) begin + // Put the request in the replay table + st1_rtab_alloc = 1'b1; + + st1_rtab_mshr_hit_o = 1'b1; + + // Do not consume a request in this cycle in stage 0 + st1_nop = 1'b1; + end + + // No available entry in the write buffer (or conflict on pending entry) + else if (!wbuf_write_ready_i) begin + // Put the request in the replay table + st1_rtab_alloc = 1'b1; + + st1_rtab_wbuf_not_ready_o = 1'b1; + + // Do not consume a request in this cycle in stage 0 + st1_nop = 1'b1; + end + + else begin + // If the request comes from the replay table, free the + // corresponding RTAB entry + st1_rtab_commit_o = st1_req_rtab_i; + + // Respond to the core (if needed) + st1_rsp_valid_o = st1_req_need_rsp_i; + + // Performance event + evt_cache_write_miss_o = 1'b1; + evt_write_req_o = 1'b1; + end + end + + // Cache hit + else begin + // No available entry in the write buffer (or conflict on pending entry) + if (!wbuf_write_ready_i) begin + // Put the request in the replay table + st1_rtab_alloc = 1'b1; + + st1_rtab_wbuf_not_ready_o = 1'b1; + + // Do not consume a request in this cycle in stage 0 + st1_nop = 1'b1; + end + + // The store can be performed in the write buffer and in the cache + else begin + // If the request comes from the replay table, free the + // corresponding RTAB entry + st1_rtab_commit_o = st1_req_rtab_i; + + // Respond to the core + st1_rsp_valid_o = st1_req_need_rsp_i; + + // Update the PLRU bit for the accessed set + st1_req_cachedir_updt_lru_o = 1'b1; + + // Write in the data RAM + st1_req_cachedata_write_enable_o = 1'b1; + + // Performance event + evt_write_req_o = 1'b1; + end + end + end + // }}} + end + // }}} + end + // }}} + + // New request + // {{{ + nop = st1_nop | st2_nop; + + // The cache controller accepts a core request when: + // - The req-refill arbiter grants the request + // - The pipeline is not being flushed + arb_st0_req_ready_o = arb_st0_req_valid_i & ~nop; + + // The cache controller accepts a refill when: + // - The req-refill arbiter grants the refill + // - The pipeline is empty + arb_refill_ready_o = arb_refill_valid_i & ~(st1_req_valid_i | st2_req_valid_i); + + // Forward the request to stage 1 + // - There is a valid request in stage 0 + st1_req_valid_o = arb_st0_req_ready_o; + + // New cacheable stage 0 request granted + // {{{ + // IMPORTANT: here the RAM is enabled independently if the + // request needs to be put on-hold. + // This increases the power consumption in that cases, but + // removes the timing paths RAM-to-RAM between the cache + // directory and the data array. + if (arb_st0_req_valid_i && !st0_req_is_uncacheable_i) begin + st0_req_cachedata_read_o = + st0_req_is_load_i & + ~(st1_req_valid_i & st1_req_is_store_i & ~st1_req_is_uncacheable_i); + if (st0_req_is_load_i | + st0_req_is_cmo_prefetch_i | + st0_req_is_store_i | + st0_req_is_amo_i ) + begin + st0_req_mshr_check_o = 1'b1; + st0_req_cachedir_read_o = ~st0_req_is_amo_i; + end + end + // }}} + // }}} + end + // }}} end of normal pipeline operation + end + // }}} +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_memarray.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_memarray.sv new file mode 100644 index 00000000000..7c7ee6597ce --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_memarray.sv @@ -0,0 +1,120 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Directory and Data Memory Arrays + * History : + */ +module hpdcache_memarray +import hpdcache_pkg::*; + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + + input hpdcache_dir_addr_t dir_addr_i, + input hpdcache_way_vector_t dir_cs_i, + input hpdcache_way_vector_t dir_we_i, + input hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_wentry_i, + output hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_rentry_o, + + input hpdcache_data_addr_t data_addr_i, + input hpdcache_data_enable_t data_cs_i, + input hpdcache_data_enable_t data_we_i, + input hpdcache_data_be_entry_t data_wbyteenable_i, + input hpdcache_data_entry_t data_wentry_i, + output hpdcache_data_entry_t data_rentry_o +); + // }}} + + // Memory arrays + // {{{ + generate + genvar x, y, dir_w; + + // Directory + // + for (dir_w = 0; dir_w < int'(HPDCACHE_WAYS); dir_w++) begin : dir_sram_gen + hpdcache_sram #( + .DATA_SIZE (HPDCACHE_DIR_RAM_WIDTH), + .ADDR_SIZE (HPDCACHE_DIR_RAM_ADDR_WIDTH) + ) dir_sram ( + .clk (clk_i), + .rst_n (rst_ni), + .cs (dir_cs_i[dir_w]), + .we (dir_we_i[dir_w]), + .addr (dir_addr_i), + .wdata (dir_wentry_i[dir_w]), + .rdata (dir_rentry_o[dir_w]) + ); + end + + // Data + // + for (y = 0; y < int'(HPDCACHE_DATA_RAM_Y_CUTS); y++) begin : data_sram_row_gen + for (x = 0; x < int'(HPDCACHE_DATA_RAM_X_CUTS); x++) begin : data_sram_col_gen + if (HPDCACHE_DATA_RAM_WBYTEENABLE) begin : data_sram_wbyteenable_gen + hpdcache_sram_wbyteenable #( + .DATA_SIZE (HPDCACHE_DATA_RAM_WIDTH), + .ADDR_SIZE (HPDCACHE_DATA_RAM_ADDR_WIDTH) + ) data_sram ( + .clk (clk_i), + .rst_n (rst_ni), + .cs (data_cs_i[y][x]), + .we (data_we_i[y][x]), + .addr (data_addr_i[y][x]), + .wdata (data_wentry_i[y][x]), + .wbyteenable (data_wbyteenable_i[y][x]), + .rdata (data_rentry_o[y][x]) + ); + end else begin : data_sram_wmask_gen + hpdcache_data_ram_data_t data_wmask; + + // build the bitmask from the write byte enable signal + always_comb + begin : data_wmask_comb + for (int w = 0; w < HPDCACHE_DATA_WAYS_PER_RAM_WORD; w++) begin + for (int b = 0; b < HPDCACHE_WORD_WIDTH/8; b++) begin + data_wmask[w][8*b +: 8] = {8{data_wbyteenable_i[y][x][w][b]}}; + end + end + end + + hpdcache_sram_wmask #( + .DATA_SIZE (HPDCACHE_DATA_RAM_WIDTH), + .ADDR_SIZE (HPDCACHE_DATA_RAM_ADDR_WIDTH) + ) data_sram ( + .clk (clk_i), + .rst_n (rst_ni), + .cs (data_cs_i[y][x]), + .we (data_we_i[y][x]), + .addr (data_addr_i[y][x]), + .wdata (data_wentry_i[y][x]), + .wmask (data_wmask), + .rdata (data_rentry_o[y][x]) + ); + end + end + end + endgenerate + // }}} +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_memctrl.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_memctrl.sv new file mode 100644 index 00000000000..e0949a2365f --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_memctrl.sv @@ -0,0 +1,656 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Directory and Data Memory RAMs Controller + * History : + */ +module hpdcache_memctrl +import hpdcache_pkg::*; + // Ports + // {{{ +( + // Global clock and reset signals + // {{{ + input logic clk_i, + input logic rst_ni, + // }}} + + // Global control signals + // {{{ + output logic ready_o, + // }}} + + // DIR array access interface + // {{{ + input logic dir_match_i, + input hpdcache_set_t dir_match_set_i, + input hpdcache_tag_t dir_match_tag_i, + input logic dir_update_lru_i, + output hpdcache_way_vector_t dir_hit_way_o, + + input logic dir_amo_match_i, + input hpdcache_set_t dir_amo_match_set_i, + input hpdcache_tag_t dir_amo_match_tag_i, + input logic dir_amo_update_plru_i, + output hpdcache_way_vector_t dir_amo_hit_way_o, + + input logic dir_refill_i, + input hpdcache_set_t dir_refill_set_i, + input hpdcache_dir_entry_t dir_refill_entry_i, + input logic dir_refill_updt_plru_i, + output hpdcache_way_vector_t dir_victim_way_o, + + input logic dir_cmo_check_i, + input hpdcache_set_t dir_cmo_check_set_i, + input hpdcache_tag_t dir_cmo_check_tag_i, + output hpdcache_way_vector_t dir_cmo_check_hit_way_o, + + input logic dir_cmo_inval_i, + input hpdcache_set_t dir_cmo_inval_set_i, + input hpdcache_way_vector_t dir_cmo_inval_way_i, + + // }}} + + // DATA array access interface + // {{{ + input logic data_req_read_i, + input hpdcache_set_t data_req_read_set_i, + input hpdcache_req_size_t data_req_read_size_i, + input hpdcache_word_t data_req_read_word_i, + output hpdcache_req_data_t data_req_read_data_o, + + input logic data_req_write_i, + input logic data_req_write_enable_i, + input hpdcache_set_t data_req_write_set_i, + input hpdcache_req_size_t data_req_write_size_i, + input hpdcache_word_t data_req_write_word_i, + input hpdcache_req_data_t data_req_write_data_i, + input hpdcache_req_be_t data_req_write_be_i, + + input logic data_amo_write_i, + input logic data_amo_write_enable_i, + input hpdcache_set_t data_amo_write_set_i, + input hpdcache_req_size_t data_amo_write_size_i, + input hpdcache_word_t data_amo_write_word_i, + input logic [63:0] data_amo_write_data_i, + input logic [7:0] data_amo_write_be_i, + + input logic data_refill_i, + input hpdcache_way_vector_t data_refill_way_i, + input hpdcache_set_t data_refill_set_i, + input hpdcache_word_t data_refill_word_i, + input hpdcache_refill_data_t data_refill_data_i + // }}} +); + // }}} + + // Definition of constants + // {{{ + localparam int unsigned HPDCACHE_ALL_CUTS = HPDCACHE_DATA_RAM_X_CUTS*HPDCACHE_DATA_RAM_Y_CUTS; + localparam int unsigned HPDCACHE_DATA_REQ_RATIO = HPDCACHE_ACCESS_WORDS/HPDCACHE_REQ_WORDS; + // }}} + + // Definition of functions + // {{{ + + // hpdcache_compute_data_ram_cs + // + // description: This function computes the chip-select signal for data + // RAMs depending on the request size and the word offset + function automatic hpdcache_data_row_enable_t hpdcache_compute_data_ram_cs( + input hpdcache_req_size_t size_i, + input hpdcache_word_t word_i); + + localparam hpdcache_uint32 off_width = + HPDCACHE_ACCESS_WORDS > 1 ? $clog2(HPDCACHE_ACCESS_WORDS) : 1; + + hpdcache_data_row_enable_t ret; + hpdcache_uint32 off; + + case (size_i) + 3'h0, + 3'h1, + 3'h2, + 3'h3: ret = hpdcache_data_row_enable_t'({ 64/HPDCACHE_WORD_WIDTH{1'b1}}); + 3'h4: ret = hpdcache_data_row_enable_t'({128/HPDCACHE_WORD_WIDTH{1'b1}}); + 3'h5: ret = hpdcache_data_row_enable_t'({256/HPDCACHE_WORD_WIDTH{1'b1}}); + default: ret = hpdcache_data_row_enable_t'({512/HPDCACHE_WORD_WIDTH{1'b1}}); + endcase + + off = HPDCACHE_ACCESS_WORDS > 1 ? hpdcache_uint'(word_i[0 +: off_width]) : 0; + return hpdcache_data_row_enable_t'(ret << off); + endfunction + + function automatic hpdcache_data_ram_row_idx_t hpdcache_way_to_data_ram_row( + input hpdcache_way_vector_t way); + for (hpdcache_uint i = 0; i < HPDCACHE_WAYS; i++) begin + if (way[i]) return hpdcache_data_ram_row_idx_t'(i / HPDCACHE_DATA_WAYS_PER_RAM_WORD); + end + return 0; + endfunction + + function automatic hpdcache_data_ram_way_idx_t hpdcache_way_to_data_ram_word( + input hpdcache_way_vector_t way); + for (hpdcache_uint i = 0; i < HPDCACHE_WAYS; i++) begin + if (way[i]) return hpdcache_data_ram_way_idx_t'(i % HPDCACHE_DATA_WAYS_PER_RAM_WORD); + end + return 0; + endfunction + + function automatic hpdcache_data_ram_addr_t hpdcache_set_to_data_ram_addr( + input hpdcache_set_t set, + input hpdcache_word_t word); + hpdcache_uint ret; + + ret = (hpdcache_uint'(set)*(HPDCACHE_CL_WORDS / HPDCACHE_ACCESS_WORDS)) + + (hpdcache_uint'(word) / HPDCACHE_ACCESS_WORDS); + + return hpdcache_data_ram_addr_t'(ret); + endfunction + // }}} + + // Definition of internal signals and registers + // {{{ + genvar gen_i, gen_j, gen_k; + + // Directory initialization signals and registers + logic init_q, init_d; + hpdcache_dir_addr_t init_set_q, init_set_d; + hpdcache_way_vector_t init_dir_cs; + hpdcache_way_vector_t init_dir_we; + hpdcache_dir_entry_t init_dir_wentry; + + // Directory valid bit vector (one bit per set and way) + hpdcache_way_vector_t [HPDCACHE_SETS-1:0] dir_valid_q, dir_valid_d; + hpdcache_set_t dir_req_set_q, dir_req_set_d; + hpdcache_dir_addr_t dir_addr; + hpdcache_way_vector_t dir_cs; + hpdcache_way_vector_t dir_we; + hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_wentry; + hpdcache_dir_entry_t [HPDCACHE_WAYS-1:0] dir_rentry; + + hpdcache_data_addr_t data_addr; + hpdcache_data_enable_t data_cs; + hpdcache_data_enable_t data_we; + hpdcache_data_be_entry_t data_wbyteenable; + hpdcache_data_entry_t data_wentry; + hpdcache_data_entry_t data_rentry; + + logic data_write; + logic data_write_enable; + hpdcache_set_t data_write_set; + hpdcache_req_size_t data_write_size; + hpdcache_word_t data_write_word; + hpdcache_refill_data_t data_write_data; + hpdcache_refill_be_t data_write_be; + + hpdcache_refill_data_t data_req_write_data; + hpdcache_refill_be_t data_req_write_be; + + hpdcache_refill_data_t data_amo_write_data; + hpdcache_refill_be_t data_amo_write_be; + + hpdcache_way_vector_t data_way; + + hpdcache_data_ram_row_idx_t data_ram_row; + hpdcache_data_ram_way_idx_t data_ram_word; + + // }}} + + // Init FSM + // {{{ + always_comb + begin : init_comb + init_dir_wentry.tag = '0; + init_dir_wentry.reserved = '0; + init_dir_cs = '0; + init_dir_we = '0; + init_d = init_q; + init_set_d = init_set_q; + + case (init_q) + 1'b0: begin + init_d = (hpdcache_uint'(init_set_q) == (HPDCACHE_SETS - 1)); + init_set_d = init_set_q + 1; + init_dir_cs = '1; + init_dir_we = '1; + end + + 1'b1: begin + init_d = 1'b1; + init_set_d = init_set_q; + end + endcase + end + + assign ready_o = init_q; + + always_ff @(posedge clk_i or negedge rst_ni) + begin : init_ff + if (!rst_ni) begin + init_q <= 1'b0; + init_set_q <= 0; + dir_valid_q <= '0; + end else begin + init_q <= init_d; + init_set_q <= init_set_d; + dir_valid_q <= dir_valid_d; + end + end + // }}} + + // Memory arrays + // {{{ + hpdcache_memarray hpdcache_memarray_i( + .clk_i, + .rst_ni, + + .dir_addr_i (dir_addr), + .dir_cs_i (dir_cs), + .dir_we_i (dir_we), + .dir_wentry_i (dir_wentry), + .dir_rentry_o (dir_rentry), + + .data_addr_i (data_addr), + .data_cs_i (data_cs), + .data_we_i (data_we), + .data_wbyteenable_i (data_wbyteenable), + .data_wentry_i (data_wentry), + .data_rentry_o (data_rentry) + ); + // }}} + + // Directory RAM request mux + // {{{ + always_comb + begin : dir_ctrl_comb + case (1'b1) + // Cache directory initialization + ~init_q: begin + dir_addr = init_set_q; + dir_cs = init_dir_cs; + dir_we = init_dir_we; + dir_wentry = {HPDCACHE_WAYS{init_dir_wentry}}; + end + + // Cache directory match tag -> hit + dir_match_i: begin + dir_addr = dir_match_set_i; + dir_cs = '1; + dir_we = '0; + dir_wentry = '0; + end + + // Cache directory AMO match tag -> hit + dir_amo_match_i: begin + dir_addr = dir_amo_match_set_i; + dir_cs = '1; + dir_we = '0; + dir_wentry = '0; + end + + // Cache directory update + dir_refill_i: begin + dir_addr = dir_refill_set_i; + dir_cs = dir_victim_way_o; + dir_we = dir_victim_way_o; + dir_wentry = {HPDCACHE_WAYS{dir_refill_entry_i}}; + end + + // Cache directory CMO match tag + dir_cmo_check_i: begin + dir_addr = dir_cmo_check_set_i; + dir_cs = '1; + dir_we = '0; + dir_wentry = '0; + end + + // Do nothing + default: begin + dir_addr = '0; + dir_cs = '0; + dir_we = '0; + dir_wentry = '0; + end + endcase + end + // }}} + + // Directory valid logic + // {{{ + always_comb + begin : dir_valid_comb + dir_valid_d = dir_valid_q; + + unique case (1'b1) + // Refill the cache after a miss + dir_refill_i: begin + dir_valid_d[dir_refill_set_i] = dir_valid_q[dir_refill_set_i] | dir_victim_way_o; + end + // CMO invalidate a set + dir_cmo_inval_i: begin + dir_valid_d[dir_cmo_inval_set_i] = dir_valid_q[dir_cmo_inval_set_i] & ~dir_cmo_inval_way_i; + end + default: begin + // do nothing + end + endcase + end + // }}} + + // Directory hit logic + // {{{ + assign dir_req_set_d = dir_match_i ? dir_match_set_i : + dir_amo_match_i ? dir_amo_match_set_i : + dir_cmo_check_i ? dir_cmo_check_set_i : + dir_req_set_q ; + + generate + hpdcache_way_vector_t req_hit; + hpdcache_way_vector_t amo_hit; + hpdcache_way_vector_t cmo_hit; + + for (gen_i = 0; gen_i < int'(HPDCACHE_WAYS); gen_i++) + begin : dir_match_tag_gen + assign req_hit[gen_i] = (dir_rentry[gen_i].tag == dir_match_tag_i), + amo_hit[gen_i] = (dir_rentry[gen_i].tag == dir_amo_match_tag_i), + cmo_hit[gen_i] = (dir_rentry[gen_i].tag == dir_cmo_check_tag_i); + + assign dir_hit_way_o [gen_i] = dir_valid_q[dir_req_set_q][gen_i] & req_hit[gen_i], + dir_amo_hit_way_o [gen_i] = dir_valid_q[dir_req_set_q][gen_i] & amo_hit[gen_i], + dir_cmo_check_hit_way_o[gen_i] = dir_valid_q[dir_req_set_q][gen_i] & cmo_hit[gen_i]; + end + endgenerate + // }}} + + // Directory victim select logic + // {{{ + logic plru_updt; + hpdcache_way_vector_t plru_updt_way; + + assign plru_updt = dir_update_lru_i | dir_amo_update_plru_i, + plru_updt_way = dir_update_lru_i ? dir_hit_way_o : dir_amo_hit_way_o; + + hpdcache_plru #( + .SETS (HPDCACHE_SETS), + .WAYS (HPDCACHE_WAYS) + ) plru_i ( + .clk_i, + .rst_ni, + + .updt_i (plru_updt), + .updt_set_i (dir_req_set_q), + .updt_way_i (plru_updt_way), + + .repl_i (dir_refill_i), + .repl_set_i (dir_refill_set_i), + .repl_dir_valid_i (dir_valid_q[dir_refill_set_i]), + .repl_updt_plru_i (dir_refill_updt_plru_i), + + .victim_way_o (dir_victim_way_o) + ); + // }}} + + // Data RAM request multiplexor + // {{{ + + // Upsize the request interface to match the maximum access width of the data RAM + generate + if (HPDCACHE_DATA_REQ_RATIO > 1) begin : upsize_data_req_write_gen + // demux request DATA + assign data_req_write_data = {HPDCACHE_DATA_REQ_RATIO{data_req_write_data_i}}; + + // demux request BE + hpdcache_demux #( + .NOUTPUT (HPDCACHE_DATA_REQ_RATIO), + .DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH/8), + .ONE_HOT_SEL (1'b0) + ) data_req_write_be_demux_i ( + .data_i (data_req_write_be_i), + .sel_i (data_req_write_word_i[HPDCACHE_REQ_WORD_INDEX_WIDTH +: + $clog2(HPDCACHE_DATA_REQ_RATIO)]), + .data_o (data_req_write_be) + ); + end else begin + assign data_req_write_data = data_req_write_data_i, + data_req_write_be = data_req_write_be_i; + end + endgenerate + + // Upsize the AMO data interface to match the maximum access width of the data RAM + generate + localparam hpdcache_uint AMO_DATA_RATIO = HPDCACHE_DATA_RAM_ACCESS_WIDTH/64; + localparam hpdcache_uint AMO_DATA_INDEX_WIDTH = $clog2(AMO_DATA_RATIO); + + if (AMO_DATA_RATIO > 1) begin + assign data_amo_write_data = {AMO_DATA_RATIO{data_amo_write_data_i}}; + + hpdcache_demux #( + .NOUTPUT (AMO_DATA_RATIO), + .DATA_WIDTH (8), + .ONE_HOT_SEL (1'b0) + ) amo_be_demux_i ( + .data_i (data_amo_write_be_i), + .sel_i (data_amo_write_word_i[0 +: AMO_DATA_INDEX_WIDTH]), + .data_o (data_amo_write_be) + ); + end else begin + assign data_amo_write_data = data_amo_write_data_i, + data_amo_write_be = data_amo_write_be_i; + end + endgenerate + + // Multiplex between data write requests + always_comb + begin : data_write_comb + case (1'b1) + data_refill_i: begin + data_write = 1'b1; + data_write_enable = 1'b1; + data_write_set = data_refill_set_i; + data_write_size = hpdcache_req_size_t'($clog2(HPDCACHE_DATA_RAM_ACCESS_WIDTH/8)); + data_write_word = data_refill_word_i; + data_write_data = data_refill_data_i; + data_write_be = '1; + end + + data_req_write_i: begin + data_write = 1'b1; + data_write_enable = data_req_write_enable_i; + data_write_set = data_req_write_set_i; + data_write_size = data_req_write_size_i; + data_write_word = data_req_write_word_i; + data_write_data = data_req_write_data; + data_write_be = data_req_write_be; + end + + data_amo_write_i: begin + data_write = 1'b1; + data_write_enable = data_amo_write_enable_i; + data_write_set = data_amo_write_set_i; + data_write_size = data_amo_write_size_i; + data_write_word = data_amo_write_word_i; + data_write_data = data_amo_write_data; + data_write_be = data_amo_write_be; + end + + default: begin + data_write = 1'b0; + data_write_enable = 1'b0; + data_write_set = '0; + data_write_size = '0; + data_write_word = '0; + data_write_data = '0; + data_write_be = '0; + end + endcase + end + + // Multiplex between read and write access on the data RAM + assign data_way = data_refill_i ? data_refill_way_i : + data_amo_write_i ? dir_amo_hit_way_o : + dir_hit_way_o; + + // Decode way index + assign data_ram_word = hpdcache_way_to_data_ram_word(data_way), + data_ram_row = hpdcache_way_to_data_ram_row(data_way); + + always_comb + begin : data_ctrl_comb + case (1'b1) + // Select data read inputs + data_req_read_i: begin + data_addr = {HPDCACHE_ALL_CUTS{hpdcache_set_to_data_ram_addr(data_req_read_set_i, + data_req_read_word_i)}}; + + data_we = '0; + data_wbyteenable = '0; + data_wentry = '0; + for (int unsigned i = 0; i < HPDCACHE_DATA_RAM_Y_CUTS; i++) begin + data_cs[i] = hpdcache_compute_data_ram_cs(data_req_read_size_i, + data_req_read_word_i); + end + end + + // Select data write inputs + data_write: begin + data_addr = {HPDCACHE_ALL_CUTS{hpdcache_set_to_data_ram_addr(data_write_set, + data_write_word)}}; + + for (int unsigned i = 0; i < HPDCACHE_DATA_RAM_Y_CUTS; i++) begin + for (int unsigned j = 0; j < HPDCACHE_DATA_RAM_X_CUTS; j++) begin + data_wentry[i][j] = {HPDCACHE_DATA_WAYS_PER_RAM_WORD{data_write_data[j]}}; + end + end + + for (int unsigned i = 0; i < HPDCACHE_DATA_RAM_Y_CUTS; i++) begin + data_cs[i] = hpdcache_compute_data_ram_cs(data_write_size, data_write_word); + + if (i == hpdcache_uint'(data_ram_row)) begin + data_we[i] = data_write_enable ? data_cs[i] : '0; + end else begin + data_we[i] = '0; + end + + // Build the write mask + for (int unsigned j = 0; j < HPDCACHE_ACCESS_WORDS; j++) begin + for (int unsigned k = 0; k < HPDCACHE_DATA_WAYS_PER_RAM_WORD; k++) begin + data_wbyteenable[i][j][k] = (k == hpdcache_uint'(data_ram_word)) ? + data_write_be[j] : '0; + end + end + end + end + + // Do nothing + default: begin + data_addr = '0; + data_cs = '0; + data_we = '0; + data_wbyteenable = '0; + data_wentry = '0; + end + endcase + end + // }}} + + // Data RAM read data multiplexor + // {{{ + generate + hpdcache_req_data_t [HPDCACHE_DATA_REQ_RATIO-1:0][HPDCACHE_WAYS-1:0] data_read_words; + hpdcache_req_data_t [HPDCACHE_WAYS-1:0] data_read_req_word; + + // Organize the read data by words (all ways for the same word are contiguous) + for (gen_i = 0; gen_i < int'(HPDCACHE_DATA_REQ_RATIO); gen_i++) begin + for (gen_j = 0; gen_j < int'(HPDCACHE_WAYS); gen_j++) begin + for (gen_k = 0; gen_k < int'(HPDCACHE_REQ_WORDS); gen_k++) begin + assign data_read_words[gen_i][gen_j][gen_k] = + data_rentry[(gen_j / HPDCACHE_DATA_WAYS_PER_RAM_WORD)] + [(gen_i * HPDCACHE_REQ_WORDS ) + gen_k] + [(gen_j % HPDCACHE_DATA_WAYS_PER_RAM_WORD)]; + end + end + end + + // Mux the data according to the access word + if (HPDCACHE_DATA_REQ_RATIO > 1) begin : req_width_lt_ram_width + typedef logic [$clog2(HPDCACHE_DATA_REQ_RATIO)-1:0] data_req_word_t; + data_req_word_t data_read_req_word_index_q; + + hpdcache_mux #( + .NINPUT (HPDCACHE_DATA_REQ_RATIO), + .DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH*HPDCACHE_WAYS) + ) data_read_req_word_mux_i( + .data_i (data_read_words), + .sel_i (data_read_req_word_index_q), + .data_o (data_read_req_word) + ); + + always_ff @(posedge clk_i) + begin : data_req_read_word_ff + data_read_req_word_index_q <= + data_req_read_word_i[HPDCACHE_REQ_WORD_INDEX_WIDTH +: + $clog2(HPDCACHE_DATA_REQ_RATIO)]; + end + end + + // Request data interface width is equal to the data RAM width + else begin : req_width_eq_ram_width + assign data_read_req_word = data_read_words; + end + + // Mux the data according to the hit way + hpdcache_mux #( + .NINPUT (HPDCACHE_WAYS), + .DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH), + .ONE_HOT_SEL (1'b1) + ) data_read_req_word_way_mux_i( + .data_i (data_read_req_word), + .sel_i (dir_hit_way_o), + .data_o (data_req_read_data_o) + ); + endgenerate + + + // Delay the accessed set for checking the tag from the directory in the + // next cycle (hit logic) + always_ff @(posedge clk_i) + begin : req_read_ff + if (dir_match_i || dir_amo_match_i || dir_cmo_check_i) begin + dir_req_set_q <= dir_req_set_d; + end + end + // }}} + + // Assertions + // {{{ + // pragma translate_off + concurrent_dir_access_assert: assert property (@(posedge clk_i) + $onehot0({dir_match_i, dir_amo_match_i, dir_cmo_check_i, dir_refill_i})) else + $error("hpdcache_memctrl: more than one process is accessing the cache directory"); + + concurrent_data_access_assert: assert property (@(posedge clk_i) + $onehot0({data_req_read_i, data_req_write_i, data_amo_write_i, data_refill_i})) else + $error("hpdcache_memctrl: more than one process is accessing the cache data"); + // pragma translate_on + // }}} +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_miss_handler.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_miss_handler.sv new file mode 100644 index 00000000000..cec5750dfdd --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_miss_handler.sv @@ -0,0 +1,673 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Miss Handler + * History : + */ +/* Possible improvements + * ===================== + * TODO Allow pipelining of the refilling operation. + * The pipelining would be as follows: + * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | + * | MSHR | WDATLO | WDATHI | | | | | + * | | NOP | MSHR | WDATLO | WDATHI | | | + * | | NOP | NOP | NOP | MSHR | WDATLO | WDATHI | + * + * This allows to handle 1 refill every 2 cycles. Otherwise, current implementation handles + * 1 refill every 3 cycles. + */ +module hpdcache_miss_handler +// {{{ +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter int HPDcacheMemIdWidth = 8, + parameter int HPDcacheMemDataWidth = 512, + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_resp_r_t = logic, + localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0] +) +// }}} +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // Global control signals + // {{{ + output logic mshr_empty_o, + output logic mshr_full_o, + // }}} + + // Configuration signals + // {{{ + input logic cfg_prefetch_updt_plru_i, + // }}} + + // CHECK interface + // {{{ + input logic mshr_check_i, + input mshr_set_t mshr_check_set_i, + input mshr_tag_t mshr_check_tag_i, + output logic mshr_check_hit_o, + // }}} + + // MISS interface + // {{{ + // MISS request interface + output logic mshr_alloc_ready_o, + input logic mshr_alloc_i, + input logic mshr_alloc_cs_i, + input hpdcache_nline_t mshr_alloc_nline_i, + output logic mshr_alloc_full_o, + input hpdcache_req_tid_t mshr_alloc_tid_i, + input hpdcache_req_sid_t mshr_alloc_sid_i, + input hpdcache_word_t mshr_alloc_word_i, + input logic mshr_alloc_need_rsp_i, + input logic mshr_alloc_is_prefetch_i, + + // REFILL MISS interface + input logic refill_req_ready_i, + output logic refill_req_valid_o, + output logic refill_busy_o, + output logic refill_updt_plru_o, + output hpdcache_set_t refill_set_o, + output hpdcache_dir_entry_t refill_dir_entry_o, + input hpdcache_way_vector_t refill_victim_way_i, + output logic refill_write_dir_o, + output logic refill_write_data_o, + output hpdcache_way_vector_t refill_victim_way_o, + output hpdcache_refill_data_t refill_data_o, + output hpdcache_word_t refill_word_o, + output hpdcache_nline_t refill_nline_o, + output logic refill_updt_rtab_o, + + // REFILL core response interface + output logic refill_core_rsp_valid_o, + output hpdcache_rsp_t refill_core_rsp_o, + // }}} + + // MEMORY interface + // {{{ + input logic mem_req_ready_i, + output logic mem_req_valid_o, + output hpdcache_mem_req_t mem_req_o, + + output logic mem_resp_ready_o, + input logic mem_resp_valid_i, + input hpdcache_mem_resp_r_t mem_resp_i + // }}} +); +// }}} + + // Declaration of constants and types + // {{{ + localparam int unsigned REFILL_REQ_RATIO = HPDCACHE_ACCESS_WORDS/HPDCACHE_REQ_WORDS; + + typedef enum logic { + MISS_REQ_IDLE = 1'b0, + MISS_REQ_SEND = 1'b1 + } miss_req_fsm_e; + + typedef enum { + REFILL_IDLE, + REFILL_WRITE, + REFILL_WRITE_DIR + } refill_fsm_e; + + typedef struct packed { + hpdcache_mem_error_e r_error; + hpdcache_mem_id_t r_id; + } mem_resp_metadata_t; + + function automatic mshr_set_t get_ack_mshr_set(hpdcache_mem_id_t id); + return id[0 +: HPDCACHE_MSHR_SET_WIDTH]; + endfunction + + function automatic mshr_way_t get_ack_mshr_way(hpdcache_mem_id_t id); + return id[HPDCACHE_MSHR_SET_WIDTH +: HPDCACHE_MSHR_WAY_WIDTH]; + endfunction + // }}} + + // Declaration of internal signals and registers + // {{{ + miss_req_fsm_e miss_req_fsm_q, miss_req_fsm_d; + mshr_way_t mshr_alloc_way_q, mshr_alloc_way_d; + mshr_set_t mshr_alloc_set_q, mshr_alloc_set_d; + mshr_tag_t mshr_alloc_tag_q, mshr_alloc_tag_d; + + refill_fsm_e refill_fsm_q, refill_fsm_d; + hpdcache_set_t refill_set_q; + hpdcache_tag_t refill_tag_q; + hpdcache_way_vector_t refill_way_q; + hpdcache_req_sid_t refill_sid_q; + hpdcache_req_tid_t refill_tid_q; + hpdcache_word_t refill_cnt_q, refill_cnt_d; + logic refill_need_rsp_q; + logic refill_is_prefetch_q; + hpdcache_word_t refill_core_rsp_word_q; + logic refill_way_bypass; + + mem_resp_metadata_t refill_fifo_resp_meta_wdata, refill_fifo_resp_meta_rdata; + logic refill_fifo_resp_meta_w, refill_fifo_resp_meta_wok; + logic refill_fifo_resp_meta_r, refill_fifo_resp_meta_rok; + + logic refill_fifo_resp_data_w, refill_fifo_resp_data_wok; + hpdcache_refill_data_t refill_fifo_resp_data_rdata; + logic refill_fifo_resp_data_r; + + hpdcache_req_sid_t refill_core_rsp_sid_q, refill_core_rsp_sid_d; + hpdcache_req_tid_t refill_core_rsp_tid_q, refill_core_rsp_tid_d; + hpdcache_req_data_t refill_core_rsp_data_q, refill_core_rsp_data_d; + logic refill_core_rsp_error_q, refill_core_rsp_error_d; + logic refill_core_rsp_valid_q, refill_core_rsp_valid_d; + hpdcache_word_t refill_core_rsp_word; + logic refill_core_rsp_valid; + + logic refill_is_error; + + logic mshr_alloc; + logic mshr_alloc_cs; + logic mshr_ack; + logic mshr_ack_cs; + mshr_set_t mshr_ack_set; + mshr_way_t mshr_ack_way; + hpdcache_nline_t mshr_ack_nline; + hpdcache_req_sid_t mshr_ack_src_id; + hpdcache_req_tid_t mshr_ack_req_id; + hpdcache_word_t mshr_ack_word; + logic mshr_ack_need_rsp; + logic mshr_ack_is_prefetch; + logic mshr_empty; + // }}} + + // Miss Request FSM + // {{{ + always_comb + begin : miss_req_fsm_comb + mshr_alloc_ready_o = 1'b0; + mshr_alloc = 1'b0; + mshr_alloc_cs = 1'b0; + mem_req_valid_o = 1'b0; + + miss_req_fsm_d = miss_req_fsm_q; + + case (miss_req_fsm_q) + MISS_REQ_IDLE: begin + mshr_alloc_ready_o = 1'b1; + mshr_alloc = mshr_alloc_i; + mshr_alloc_cs = mshr_alloc_cs_i; + if (mshr_alloc_i) begin + miss_req_fsm_d = MISS_REQ_SEND; + end else begin + miss_req_fsm_d = MISS_REQ_IDLE; + end + end + MISS_REQ_SEND: begin + mem_req_valid_o = 1'b1; + if (mem_req_ready_i) begin + miss_req_fsm_d = MISS_REQ_IDLE; + end else begin + miss_req_fsm_d = MISS_REQ_SEND; + end + end + endcase + end + + localparam hpdcache_uint REFILL_REQ_SIZE = $clog2(HPDcacheMemDataWidth/8); + localparam hpdcache_uint REFILL_REQ_LEN = HPDCACHE_CL_WIDTH/HPDcacheMemDataWidth; + + assign mem_req_o.mem_req_addr = {mshr_alloc_tag_q, mshr_alloc_set_q, {HPDCACHE_OFFSET_WIDTH{1'b0}} }, + mem_req_o.mem_req_len = hpdcache_mem_len_t'(REFILL_REQ_LEN-1), + mem_req_o.mem_req_size = hpdcache_mem_size_t'(REFILL_REQ_SIZE), + mem_req_o.mem_req_id = hpdcache_mem_id_t'({mshr_alloc_way_q, mshr_alloc_set_q}), + mem_req_o.mem_req_command = HPDCACHE_MEM_READ, + mem_req_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD, + mem_req_o.mem_req_cacheable = 1'b1; + + always_ff @(posedge clk_i) + begin : miss_req_fsm_internal_ff + if (mshr_alloc) begin + mshr_alloc_way_q <= mshr_alloc_way_d; + mshr_alloc_set_q <= mshr_alloc_set_d; + mshr_alloc_tag_q <= mshr_alloc_tag_d; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : miss_req_fsm_ff + if (!rst_ni) begin + miss_req_fsm_q <= MISS_REQ_IDLE; + end else begin + miss_req_fsm_q <= miss_req_fsm_d; + end + end + // }}} + + // Refill FSM + // {{{ + + // ask permission to the refill arbiter if there is a pending refill + assign refill_req_valid_o = refill_fsm_q == REFILL_IDLE ? refill_fifo_resp_meta_rok : 1'b0; + + // forward the victim way directly from the victim selection logic or + // from the internal register + assign refill_victim_way_o = refill_way_bypass ? refill_victim_way_i : refill_way_q; + + always_comb + begin : miss_resp_fsm_comb + automatic hpdcache_uint REFILL_LAST_CHUNK_WORD; + REFILL_LAST_CHUNK_WORD = HPDCACHE_CL_WORDS - HPDCACHE_ACCESS_WORDS; + + refill_updt_plru_o = 1'b0; + refill_set_o = '0; + refill_write_dir_o = 1'b0; + refill_write_data_o = 1'b0; + refill_word_o = 0; + refill_data_o = '0; + refill_updt_rtab_o = 1'b0; + refill_cnt_d = refill_cnt_q; + refill_way_bypass = 1'b0; + + refill_core_rsp_valid = 1'b0; + refill_core_rsp_sid_d = refill_core_rsp_sid_q; + refill_core_rsp_tid_d = refill_core_rsp_tid_q; + refill_core_rsp_error_d = refill_core_rsp_error_q; + refill_core_rsp_word = 0; + + refill_fifo_resp_meta_r = 1'b0; + refill_fifo_resp_data_r = 1'b0; + + mshr_ack_cs = 1'b0; + mshr_ack = 1'b0; + + refill_fsm_d = refill_fsm_q; + + case (refill_fsm_q) + // Wait for refill responses + // {{{ + REFILL_IDLE: begin + if (refill_fifo_resp_meta_rok) begin + // anticipate the activation of the MSHR independently of the grant signal from + // the refill arbiter. This is to avoid the introduction of unnecessary timing + // paths (however there could be a minor augmentation of the power + // consumption). + mshr_ack_cs = 1'b1; + + // if the permission is granted, start refilling + if (refill_req_ready_i) begin + refill_fsm_d = REFILL_WRITE; + + // read the MSHR and reset the valid bit for the + // corresponding entry + mshr_ack = 1'b1; + + // initialize the counter for refill words + refill_cnt_d = 0; + end + end + end + // }}} + + // Write refill data into the cache + // {{{ + REFILL_WRITE: begin + automatic logic is_prefetch; + + // Respond to the core (when needed) + if (refill_cnt_q == 0) begin + automatic hpdcache_uint _core_rsp_word; + _core_rsp_word = hpdcache_uint'(mshr_ack_word)/HPDCACHE_ACCESS_WORDS; + + if (mshr_ack_need_rsp) begin + refill_core_rsp_valid = (hpdcache_uint'(_core_rsp_word) == 0); + end + + refill_core_rsp_sid_d = mshr_ack_src_id; + refill_core_rsp_tid_d = mshr_ack_req_id; + refill_core_rsp_error_d = refill_is_error; + refill_core_rsp_word = hpdcache_word_t'( + hpdcache_uint'(mshr_ack_word)/HPDCACHE_REQ_WORDS); + end else begin + automatic hpdcache_uint _core_rsp_word; + _core_rsp_word = hpdcache_uint'(refill_core_rsp_word_q)/ + HPDCACHE_ACCESS_WORDS; + + if (refill_need_rsp_q) begin + automatic hpdcache_uint _refill_cnt; + _refill_cnt = hpdcache_uint'(refill_cnt_q)/HPDCACHE_ACCESS_WORDS; + refill_core_rsp_valid = (_core_rsp_word == _refill_cnt); + end + + refill_core_rsp_sid_d = refill_sid_q; + refill_core_rsp_tid_d = refill_tid_q; + refill_core_rsp_error_d = refill_is_error; + refill_core_rsp_word = hpdcache_word_t'( + hpdcache_uint'(refill_core_rsp_word_q)/HPDCACHE_REQ_WORDS); + end + + // Write the the data in the cache data array + if (refill_cnt_q == 0) begin + refill_set_o = mshr_ack_nline[0 +: HPDCACHE_SET_WIDTH]; + refill_way_bypass = 1'b1; + is_prefetch = mshr_ack_is_prefetch; + end else begin + refill_set_o = refill_set_q; + refill_way_bypass = 1'b0; + is_prefetch = refill_is_prefetch_q; + end + refill_write_data_o = ~refill_is_error; + refill_data_o = refill_fifo_resp_data_rdata; + refill_word_o = refill_cnt_q; + + // Consume chunk of data from the FIFO buffer in the memory interface + refill_fifo_resp_data_r = 1'b1; + + // Update directory on the last chunk of data + refill_cnt_d = refill_cnt_q + hpdcache_word_t'(HPDCACHE_ACCESS_WORDS); + + if (hpdcache_uint'(refill_cnt_q) == REFILL_LAST_CHUNK_WORD) begin + if (REFILL_LAST_CHUNK_WORD == 0) begin + // Special case: if the cache-line data can be written in a single cycle, + // wait an additional cycle to write the directory. This allows to prevent + // a RAM-to-RAM timing path between the MSHR and the DIR. + refill_fsm_d = REFILL_WRITE_DIR; + end else begin + // Write the new entry in the cache directory + refill_write_dir_o = ~refill_is_error; + + // Update the PLRU bits. Only in the following cases: + // - There is no error in response AND + // - It is a prefetch and the cfg_prefetch_updt_plru_i is set OR + // - It is a read miss. + refill_updt_plru_o = ~refill_is_error & + (~is_prefetch | cfg_prefetch_updt_plru_i); + + // Update dependency flags in the retry table + refill_updt_rtab_o = 1'b1; + + // consume the response from the network + refill_fifo_resp_meta_r = 1'b1; + + refill_fsm_d = REFILL_IDLE; + end + end + end + // }}} + + // Write cache directory (this state is only visited when ACCESS_WORDS == CL_WORDS, + // this is when the entire cache-line can be written in a single cycle) + // {{{ + REFILL_WRITE_DIR: begin + automatic logic is_prefetch; + is_prefetch = refill_is_prefetch_q; + + // Select the target set and way + refill_set_o = refill_set_q; + refill_way_bypass = 1'b0; + + // Write the new entry in the cache directory + refill_write_dir_o = ~refill_is_error; + + // Update the PLRU bits. Only in the following cases: + // - There is no error in response AND + // - It is a prefetch and the cfg_prefetch_updt_plru_i is set OR + // - It is a read miss. + refill_updt_plru_o = ~refill_is_error & + (~is_prefetch | cfg_prefetch_updt_plru_i); + + // Update dependency flags in the retry table + refill_updt_rtab_o = 1'b1; + + // consume the response from the network + refill_fifo_resp_meta_r = 1'b1; + + refill_fsm_d = REFILL_IDLE; + end + // }}} + + default: begin + // pragma translate_off + $error("Illegal state"); + // pragma translate_on + end + endcase + end + + assign refill_is_error = (refill_fifo_resp_meta_rdata.r_error == HPDCACHE_MEM_RESP_NOK); + + assign refill_busy_o = (refill_fsm_q != REFILL_IDLE), + refill_nline_o = {refill_tag_q, refill_set_q}; + + assign mshr_ack_set = get_ack_mshr_set(refill_fifo_resp_meta_rdata.r_id), + mshr_ack_way = get_ack_mshr_way(refill_fifo_resp_meta_rdata.r_id); + + assign refill_dir_entry_o.tag = refill_tag_q, + refill_dir_entry_o.reserved = '0; + + assign refill_core_rsp_valid_d = ~refill_core_rsp_valid_q & refill_core_rsp_valid; + + assign refill_core_rsp_valid_o = refill_core_rsp_valid_q, + refill_core_rsp_o.rdata = refill_core_rsp_data_q, + refill_core_rsp_o.sid = refill_core_rsp_sid_q, + refill_core_rsp_o.tid = refill_core_rsp_tid_q, + refill_core_rsp_o.error = refill_core_rsp_error_q, + refill_core_rsp_o.aborted = 1'b0; + + generate + // refill's width is bigger than the width of the core's interface + if (REFILL_REQ_RATIO > 1) begin : core_rsp_data_mux_gen + hpdcache_mux #( + .NINPUT (REFILL_REQ_RATIO), + .DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH) + ) data_read_rsp_mux_i( + .data_i (refill_data_o), + .sel_i (refill_core_rsp_word[0 +: $clog2(REFILL_REQ_RATIO)]), + .data_o (refill_core_rsp_data_d) + ); + end + + // refill's width is equal to the width of the core's interface + else begin + assign refill_core_rsp_data_d = refill_data_o; + end + endgenerate + + /* FIXME: when multiple chunks, in case of error, the error bit is not + * necessarily set on all chunks */ + assign refill_fifo_resp_meta_wdata = '{ + r_error: mem_resp_i.mem_resp_r_error, + r_id : mem_resp_i.mem_resp_r_id + }; + + hpdcache_fifo_reg #( + .FIFO_DEPTH (2), + .fifo_data_t (mem_resp_metadata_t) + ) i_r_metadata_fifo ( + .clk_i, + .rst_ni, + + .w_i (refill_fifo_resp_meta_w), + .wok_o (refill_fifo_resp_meta_wok), + .wdata_i(refill_fifo_resp_meta_wdata), + + .r_i (refill_fifo_resp_meta_r), + .rok_o (refill_fifo_resp_meta_rok), + .rdata_o(refill_fifo_resp_meta_rdata) + ); + + generate + if (HPDcacheMemDataWidth < HPDCACHE_REFILL_DATA_WIDTH) begin + hpdcache_data_upsize #( + .WR_WIDTH(HPDcacheMemDataWidth), + .RD_WIDTH(HPDCACHE_REFILL_DATA_WIDTH), + .DEPTH(2*(HPDCACHE_CL_WIDTH/HPDCACHE_REFILL_DATA_WIDTH)) + ) i_rdata_upsize ( + .clk_i, + .rst_ni, + + .w_i (refill_fifo_resp_data_w), + .wlast_i (mem_resp_i.mem_resp_r_last), + .wok_o (refill_fifo_resp_data_wok), + .wdata_i (mem_resp_i.mem_resp_r_data), + + .r_i (refill_fifo_resp_data_r), + .rok_o (/* unused */), + .rdata_o (refill_fifo_resp_data_rdata) + ); + end else if (HPDcacheMemDataWidth > HPDCACHE_REFILL_DATA_WIDTH) begin + hpdcache_data_downsize #( + .WR_WIDTH(HPDcacheMemDataWidth), + .RD_WIDTH(HPDCACHE_REFILL_DATA_WIDTH), + .DEPTH(2*(HPDCACHE_CL_WIDTH/HPDcacheMemDataWidth)) + ) i_rdata_downsize ( + .clk_i, + .rst_ni, + + .w_i (refill_fifo_resp_data_w), + .wok_o (refill_fifo_resp_data_wok), + .wdata_i (mem_resp_i.mem_resp_r_data), + + .r_i (refill_fifo_resp_data_r), + .rok_o (/* unused */), + .rdata_o (refill_fifo_resp_data_rdata) + ); + end else begin + hpdcache_fifo_reg #( + .FIFO_DEPTH (2), + .fifo_data_t (hpdcache_refill_data_t) + ) i_rdata_fifo ( + .clk_i, + .rst_ni, + + .w_i (refill_fifo_resp_data_w), + .wok_o (refill_fifo_resp_data_wok), + .wdata_i (mem_resp_i.mem_resp_r_data), + + .r_i (refill_fifo_resp_data_r), + .rok_o (/* unused */), + .rdata_o (refill_fifo_resp_data_rdata) + ); + end + endgenerate + + assign refill_fifo_resp_data_w = mem_resp_valid_i & + (refill_fifo_resp_meta_wok | ~mem_resp_i.mem_resp_r_last), + refill_fifo_resp_meta_w = mem_resp_valid_i & + (refill_fifo_resp_data_wok & mem_resp_i.mem_resp_r_last), + mem_resp_ready_o = refill_fifo_resp_data_wok & + (refill_fifo_resp_meta_wok | ~mem_resp_i.mem_resp_r_last); + + always_ff @(posedge clk_i or negedge rst_ni) + begin : miss_resp_fsm_ff + if (!rst_ni) begin + refill_fsm_q <= REFILL_IDLE; + refill_core_rsp_valid_q <= 1'b0; + end else begin + refill_fsm_q <= refill_fsm_d; + refill_core_rsp_valid_q <= refill_core_rsp_valid_d; + end + end + + always_ff @(posedge clk_i) + begin : miss_resp_fsm_internal_ff + if ((refill_fsm_q == REFILL_WRITE) && (refill_cnt_q == 0)) begin + refill_set_q <= mshr_ack_nline[0 +: HPDCACHE_SET_WIDTH]; + refill_tag_q <= mshr_ack_nline[HPDCACHE_SET_WIDTH +: HPDCACHE_TAG_WIDTH];; + refill_way_q <= refill_victim_way_i; + refill_sid_q <= mshr_ack_src_id; + refill_tid_q <= mshr_ack_req_id; + refill_need_rsp_q <= mshr_ack_need_rsp; + refill_is_prefetch_q <= mshr_ack_is_prefetch; + refill_core_rsp_word_q <= mshr_ack_word; + end + refill_cnt_q <= refill_cnt_d; + end + + always_ff @(posedge clk_i) + begin : core_rsp_ff + if (!refill_core_rsp_valid_q && refill_core_rsp_valid) begin + refill_core_rsp_sid_q <= refill_core_rsp_sid_d; + refill_core_rsp_tid_q <= refill_core_rsp_tid_d; + refill_core_rsp_data_q <= refill_core_rsp_data_d; + refill_core_rsp_error_q <= refill_core_rsp_error_d; + end + end + // }}} + + // Miss Status Holding Register component + // {{{ + hpdcache_mshr hpdcache_mshr_i ( + .clk_i, + .rst_ni, + + .empty_o (mshr_empty), + .full_o (mshr_full_o), + + .check_i (mshr_check_i), + .check_set_i (mshr_check_set_i), + .check_tag_i (mshr_check_tag_i), + .hit_o (mshr_check_hit_o), + .alloc_i (mshr_alloc), + .alloc_cs_i (mshr_alloc_cs), + .alloc_nline_i (mshr_alloc_nline_i), + .alloc_req_id_i (mshr_alloc_tid_i), + .alloc_src_id_i (mshr_alloc_sid_i), + .alloc_word_i (mshr_alloc_word_i), + .alloc_need_rsp_i (mshr_alloc_need_rsp_i), + .alloc_is_prefetch_i (mshr_alloc_is_prefetch_i), + .alloc_full_o (mshr_alloc_full_o), + .alloc_set_o (mshr_alloc_set_d), + .alloc_tag_o (mshr_alloc_tag_d), + .alloc_way_o (mshr_alloc_way_d), + + .ack_i (mshr_ack), + .ack_cs_i (mshr_ack_cs), + .ack_set_i (mshr_ack_set), + .ack_way_i (mshr_ack_way), + .ack_req_id_o (mshr_ack_req_id), + .ack_src_id_o (mshr_ack_src_id), + .ack_nline_o (mshr_ack_nline), + .ack_word_o (mshr_ack_word), + .ack_need_rsp_o (mshr_ack_need_rsp), + .ack_is_prefetch_o (mshr_ack_is_prefetch) + ); + + // Indicate to the cache controller that there is no pending miss. This + // is, when the MSHR is empty, and the MISS handler has finished of + // processing the last miss response. + assign mshr_empty_o = mshr_empty & ~refill_busy_o; + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial assert (HPDcacheMemIdWidth >= (HPDCACHE_MSHR_SET_WIDTH + HPDCACHE_MSHR_WAY_WIDTH)) else + $error("miss_handler: not enough ID bits in the memory interface"); + // pragma translate_on + // }}} + +endmodule +// }}} diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_mshr.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_mshr.sv new file mode 100644 index 00000000000..f63e408f38d --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_mshr.sv @@ -0,0 +1,385 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Miss Status Holding Register (MSHR) + * History : + */ +module hpdcache_mshr +import hpdcache_pkg::*; + // Ports + // {{{ +( + // Clock and reset signals + input logic clk_i, + input logic rst_ni, + + // Global control signals + output logic empty_o, + output logic full_o, + + // Check and allocation interface + input logic check_i, + input mshr_set_t check_set_i, + input mshr_tag_t check_tag_i, + output logic hit_o, + input logic alloc_i, + input logic alloc_cs_i, + input hpdcache_nline_t alloc_nline_i, + input hpdcache_req_tid_t alloc_req_id_i, + input hpdcache_req_sid_t alloc_src_id_i, + input hpdcache_word_t alloc_word_i, + input logic alloc_need_rsp_i, + input logic alloc_is_prefetch_i, + output logic alloc_full_o, + output mshr_set_t alloc_set_o, + output mshr_tag_t alloc_tag_o, + output mshr_way_t alloc_way_o, + + // Acknowledge interface + input logic ack_i, + input logic ack_cs_i, + input mshr_set_t ack_set_i, + input mshr_way_t ack_way_i, + output hpdcache_req_tid_t ack_req_id_o, + output hpdcache_req_sid_t ack_src_id_o, + output hpdcache_nline_t ack_nline_o, + output hpdcache_word_t ack_word_o, + output logic ack_need_rsp_o, + output logic ack_is_prefetch_o +); + // }}} + + // Definition of constants and types + // {{{ + typedef struct packed { + mshr_tag_t tag; + hpdcache_req_tid_t req_id; + hpdcache_req_sid_t src_id; + hpdcache_word_t word_idx; + logic need_rsp; + logic is_prefetch; + } mshr_entry_t; + + + // Compute the width of MSHR entries depending on the support of write + // bitmask or not (write byte enable) + localparam int unsigned HPDCACHE_MSHR_ENTRY_BITS = $bits(mshr_entry_t); + + localparam int unsigned HPDCACHE_MSHR_RAM_ENTRY_BITS = + HPDCACHE_MSHR_RAM_WBYTEENABLE ? + ((HPDCACHE_MSHR_ENTRY_BITS + 7)/8) * 8 : // align to 8 bits + HPDCACHE_MSHR_ENTRY_BITS; // or use the exact number of bits + + typedef logic [HPDCACHE_MSHR_RAM_ENTRY_BITS-1:0] mshr_sram_data_t; + // }}} + + // Definition of internal wires and registers + // {{{ + logic [HPDCACHE_MSHR_SETS*HPDCACHE_MSHR_WAYS-1:0] mshr_valid_q, mshr_valid_d; + mshr_set_t check_set_q; + mshr_set_t alloc_set; + mshr_tag_t alloc_tag; + hpdcache_set_t alloc_dcache_set; + mshr_way_t ack_way_q; + mshr_set_t ack_set_q; + hpdcache_set_t ack_dcache_set; + hpdcache_tag_t ack_dcache_tag; + + logic [HPDCACHE_MSHR_SETS*HPDCACHE_MSHR_WAYS-1:0] mshr_valid_set, mshr_valid_rst; + mshr_entry_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wentry; + mshr_sram_data_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wdata; + mshr_entry_t [HPDCACHE_MSHR_WAYS-1:0] mshr_rentry; + mshr_sram_data_t [HPDCACHE_MSHR_WAYS-1:0] mshr_rdata; + + logic mshr_we; + logic mshr_cs; + mshr_set_t mshr_addr; + logic check; + // }}} + + // Control part for the allocation and check operations + // {{{ + + // The allocation operation is prioritary with respect to the check operation + assign check = check_i & ~alloc_i; + + assign alloc_set = alloc_nline_i[0 +: HPDCACHE_MSHR_SET_WIDTH], + alloc_tag = alloc_nline_i[HPDCACHE_MSHR_SET_WIDTH +: HPDCACHE_MSHR_TAG_WIDTH], + alloc_dcache_set = alloc_nline_i[0 +: HPDCACHE_SET_WIDTH]; + + // Look for an available way in case of allocation + always_comb + begin + automatic mshr_way_t found_available_way; + + found_available_way = 0; + for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin + if (!mshr_valid_q[i*HPDCACHE_MSHR_SETS + int'(alloc_set)]) begin + found_available_way = mshr_way_t'(i); + break; + end + end + alloc_way_o = found_available_way; + end + + // Look if the mshr can accept the checked nline (in case of allocation) + always_comb + begin + automatic bit found_available; + + found_available = 1'b0; + for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin + if (!mshr_valid_q[i*HPDCACHE_MSHR_SETS + int'(check_set_q)]) begin + found_available = 1'b1; + break; + end + end + alloc_full_o = ~found_available; + end + + assign alloc_set_o = alloc_set, + alloc_tag_o = alloc_tag; + + // Write when there is an allocation operation + assign mshr_we = alloc_i; + + // HPDcache SET to MSHR SET translation table + hpdcache_mshr_to_cache_set trlt_i ( + .clk_i, + .write_i (mshr_we), + .write_dcache_set_i (alloc_dcache_set), + .write_mshr_way_i (alloc_way_o), + .read_mshr_set_i (ack_set_q), + .read_mshr_way_i (ack_way_q), + .read_dcache_set_o (ack_dcache_set) + ); + + + // Generate write data and mask depending on the available way + always_comb + begin + for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin + mshr_wentry[i].tag = alloc_tag; + mshr_wentry[i].req_id = alloc_req_id_i; + mshr_wentry[i].src_id = alloc_src_id_i; + mshr_wentry[i].word_idx = alloc_word_i; + mshr_wentry[i].need_rsp = alloc_need_rsp_i; + mshr_wentry[i].is_prefetch = alloc_is_prefetch_i; + end + end + // }}} + + // Shared control signals + // {{{ + assign mshr_cs = check_i | alloc_cs_i | ack_cs_i; + assign mshr_addr = ack_i ? ack_set_i : + (alloc_i ? alloc_set : check_set_i); + + always_comb + begin : mshr_valid_comb + automatic logic unsigned [HPDCACHE_MSHR_WAY_WIDTH+HPDCACHE_MSHR_SET_WIDTH-1:0] mshr_alloc_slot; + automatic logic unsigned [HPDCACHE_MSHR_WAY_WIDTH+HPDCACHE_MSHR_SET_WIDTH-1:0] mshr_ack_slot; + + mshr_alloc_slot = {alloc_way_o, alloc_set}; + mshr_ack_slot = { ack_way_i, ack_set_i}; + + for (int unsigned i = 0; i < HPDCACHE_MSHR_SETS*HPDCACHE_MSHR_WAYS; i++) begin + mshr_valid_rst[i] = (i == hpdcache_uint'(mshr_ack_slot)) ? ack_i : 1'b0; + mshr_valid_set[i] = (i == hpdcache_uint'(mshr_alloc_slot)) ? alloc_i : 1'b0; + end + end + assign mshr_valid_d = (~mshr_valid_q & mshr_valid_set) | (mshr_valid_q & ~mshr_valid_rst); + // }}} + + // Read interface (ack) + // {{{ + generate + // extract HPDcache tag from the MSb of the MSHT TAG + if (HPDCACHE_SETS >= HPDCACHE_MSHR_SETS) begin : ack_dcache_set_ge_mshr_set_gen + assign ack_dcache_tag = mshr_rentry[ack_way_q].tag[ + HPDCACHE_MSHR_TAG_WIDTH - 1 : + HPDCACHE_MSHR_TAG_WIDTH - HPDCACHE_TAG_WIDTH]; + end + + // extract HPDcache tag from MSb of the MSHR set concatenated with the MSHR tag + else begin : ack_dcache_set_lt_mshr_set_gen + assign ack_dcache_tag = { + mshr_rentry[ack_way_q].tag , + ack_set_q[HPDCACHE_MSHR_SET_WIDTH - 1:HPDCACHE_SET_WIDTH]}; + end + endgenerate + + assign ack_req_id_o = mshr_rentry[ack_way_q].req_id, + ack_src_id_o = mshr_rentry[ack_way_q].src_id, + ack_nline_o = {ack_dcache_tag, ack_dcache_set}, + ack_word_o = mshr_rentry[ack_way_q].word_idx, + ack_need_rsp_o = mshr_rentry[ack_way_q].need_rsp, + ack_is_prefetch_o = mshr_rentry[ack_way_q].is_prefetch; + // }}} + + // Global control signals + // {{{ + assign empty_o = ~|mshr_valid_q; + assign full_o = &mshr_valid_q; + + always_comb + begin : hit_comb + automatic bit [HPDCACHE_MSHR_WAYS-1:0] __hit_way; + + for (int unsigned w = 0; w < HPDCACHE_MSHR_WAYS; w++) begin + automatic bit __valid; + automatic bit __match; + __valid = mshr_valid_q[w*HPDCACHE_MSHR_SETS + int'(check_set_q)]; + __match = (mshr_rentry[w].tag == check_tag_i); + __hit_way[w] = (__valid && __match); + end + + hit_o = |__hit_way; + end + // }}} + + // Internal state assignment + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin : mshr_ff_set + if (!rst_ni) begin + mshr_valid_q <= '0; + ack_way_q <= '0; + ack_set_q <= '0; + check_set_q <= '0; + end else begin + mshr_valid_q <= mshr_valid_d; + if (ack_i) begin + ack_way_q <= ack_way_i; + ack_set_q <= ack_set_i; + end + if (check) begin + check_set_q <= check_set_i; + end + end + end + // }}} + + // Internal components + // {{{ + generate + if (HPDCACHE_MSHR_RAM_WBYTEENABLE) begin : mshr_wbyteenable_gen + typedef logic [HPDCACHE_MSHR_RAM_ENTRY_BITS/8-1:0] mshr_sram_wbyteenable_t; + mshr_sram_wbyteenable_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wbyteenable; + + always_comb + begin : mshr_wbyteenable_comb + for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin + mshr_wbyteenable[i] = (int'(alloc_way_o) == i) ? '1 : '0; + end + end + + if (HPDCACHE_MSHR_USE_REGBANK) begin : mshr_regbank_gen + hpdcache_regbank_wbyteenable_1rw #( + .DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS), + .ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH) + ) mshr_mem( + .clk (clk_i), + .rst_n (rst_ni), + .cs (mshr_cs), + .we (mshr_we), + .addr (mshr_addr), + .wbyteenable (mshr_wbyteenable), + .wdata (mshr_wdata), + .rdata (mshr_rdata) + ); + end else begin : mshr_sram_gen + hpdcache_sram_wbyteenable #( + .DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS), + .ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH) + ) mshr_mem( + .clk (clk_i), + .rst_n (rst_ni), + .cs (mshr_cs), + .we (mshr_we), + .addr (mshr_addr), + .wbyteenable (mshr_wbyteenable), + .wdata (mshr_wdata), + .rdata (mshr_rdata) + ); + end + end else begin : mshr_wmask_gen + typedef logic [HPDCACHE_MSHR_RAM_ENTRY_BITS-1:0] mshr_sram_wmask_t; + mshr_sram_wmask_t [HPDCACHE_MSHR_WAYS-1:0] mshr_wmask; + + always_comb + begin : mshr_wmask_comb + for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin + mshr_wmask[i] = (int'(alloc_way_o) == i) ? '1 : '0; + end + end + + if (HPDCACHE_MSHR_USE_REGBANK) begin : mshr_regbank_gen + hpdcache_regbank_wmask_1rw #( + .DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS), + .ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH) + ) mshr_mem( + .clk (clk_i), + .rst_n (rst_ni), + .cs (mshr_cs), + .we (mshr_we), + .addr (mshr_addr), + .wmask (mshr_wmask), + .wdata (mshr_wdata), + .rdata (mshr_rdata) + ); + end else begin : mshr_sram_gen + hpdcache_sram_wmask #( + .DATA_SIZE (HPDCACHE_MSHR_WAYS*HPDCACHE_MSHR_RAM_ENTRY_BITS), + .ADDR_SIZE (HPDCACHE_MSHR_SET_WIDTH) + ) mshr_mem( + .clk (clk_i), + .rst_n (rst_ni), + .cs (mshr_cs), + .we (mshr_we), + .addr (mshr_addr), + .wmask (mshr_wmask), + .wdata (mshr_wdata), + .rdata (mshr_rdata) + ); + end + end + endgenerate + + always_comb + begin : ram_word_fitting_comb + for (int unsigned i = 0; i < HPDCACHE_MSHR_WAYS; i++) begin + mshr_wdata[i] = mshr_sram_data_t'(mshr_wentry[i]); + mshr_rentry[i] = mshr_entry_t'(mshr_rdata[i][0 +: HPDCACHE_MSHR_ENTRY_BITS]); + end + end + // }}} + + // Assertions + // {{{ + // pragma translate_off + one_command_assert: assert property (@(posedge clk_i) + (ack_i -> !(alloc_i || check_i))) else + $error("MSHR: ack with concurrent alloc or check"); + // pragma translate_on + // }}} +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv new file mode 100644 index 00000000000..3dc8b73af55 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_mshr_to_cache_set.sv @@ -0,0 +1,105 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache MSHR set translation table + * History : + */ +module hpdcache_mshr_to_cache_set +import hpdcache_pkg::*; +// Ports +// {{{ +( + // Clock signals + input logic clk_i, + + // Write interface + input logic write_i, + input hpdcache_set_t write_dcache_set_i, + input mshr_way_t write_mshr_way_i, + + // Read interface + input mshr_way_t read_mshr_way_i, + input mshr_set_t read_mshr_set_i, + output hpdcache_set_t read_dcache_set_o +); +// }}} + // + + generate + // Number of HPDcache sets is bigger than the MSHR sets + // In this case, a translation table (in flip-flops) is needed + // {{{ + // Write most significant bits of the HPDcache set into the + // translation table + if (HPDCACHE_SETS > HPDCACHE_MSHR_SETS) begin : hpdcache_sets_gt_mshr_sets_gen + localparam hpdcache_uint TRLT_TAB_ENTRY_WIDTH = + HPDCACHE_SET_WIDTH - HPDCACHE_MSHR_SET_WIDTH; + typedef logic [TRLT_TAB_ENTRY_WIDTH-1:0] trlt_entry_t; + + + // Translation table + // + // This table is used to store the most significant bits of the HPDcache set + trlt_entry_t [HPDCACHE_MSHR_SETS-1:0][HPDCACHE_MSHR_WAYS-1:0] tab; + trlt_entry_t tab_wdata; + mshr_set_t write_mshr_set; + + // Write operation + // {{{ + // Write most significant bits of the HPDcache set into the + // translation table + always_ff @(posedge clk_i) + begin + if (write_i) begin + tab[write_mshr_set][write_mshr_way_i] <= tab_wdata; + end + end + + assign tab_wdata = write_dcache_set_i[HPDCACHE_MSHR_SET_WIDTH +: + TRLT_TAB_ENTRY_WIDTH], + write_mshr_set = write_dcache_set_i[0 +: HPDCACHE_MSHR_SET_WIDTH]; + // }}} + + // Read operation + // {{{ + // Concatenate the mshr set with the most significant bits of the + // dcache set stored in the translation table + assign read_dcache_set_o = {tab[read_mshr_set_i][read_mshr_way_i], read_mshr_set_i}; + // }}} + end + // }}} + + // Number of HPDcache sets is smaller or equal than the MSHR sets + // In this case, no translation table is needed + // {{{ + else begin : hpdcache_sets_le_mshr_sets_gen + assign read_dcache_set_o = hpdcache_set_t'(read_mshr_set_i); + end + // }}} + endgenerate + +// Assertions +// {{{ +// pragma translate_off +// pragma translate_on +// }}} +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_pkg.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_pkg.sv new file mode 100755 index 00000000000..81db44d0871 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_pkg.sv @@ -0,0 +1,613 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Write-Through (WT), High-Throughput (HTPUT) HPDcache Package + * History : + */ +package hpdcache_pkg; + // Definition of global constants for the HPDcache data and directory + // {{{ + + // HPDcache physical address width (bits) + localparam int unsigned HPDCACHE_PA_WIDTH = hpdcache_params_pkg::PARAM_PA_WIDTH; + + // HPDcache number of sets + localparam int unsigned HPDCACHE_SETS = hpdcache_params_pkg::PARAM_SETS; + + // HPDcache number of ways + localparam int unsigned HPDCACHE_WAYS = hpdcache_params_pkg::PARAM_WAYS; + + // HPDcache word width (bits) + localparam int unsigned HPDCACHE_WORD_WIDTH = hpdcache_params_pkg::PARAM_WORD_WIDTH; + + // HPDcache cache-line width (bits) + localparam int unsigned HPDCACHE_CL_WORDS = hpdcache_params_pkg::PARAM_CL_WORDS; + + // HPDcache number of words in the request data channels (request and response) + localparam int unsigned HPDCACHE_REQ_WORDS = hpdcache_params_pkg::PARAM_REQ_WORDS; + + // HPDcache request transaction ID width (bits) + localparam int unsigned HPDCACHE_REQ_TRANS_ID_WIDTH = hpdcache_params_pkg::PARAM_REQ_TRANS_ID_WIDTH; + + // HPDcache request source ID width (bits) + localparam int unsigned HPDCACHE_REQ_SRC_ID_WIDTH = hpdcache_params_pkg::PARAM_REQ_SRC_ID_WIDTH; + // }}} + + // Utility definitions + // {{{ + typedef logic unsigned [31:0] hpdcache_uint; + typedef logic signed [31:0] hpdcache_int; + typedef logic unsigned [31:0] hpdcache_uint32; + typedef logic signed [31:0] hpdcache_int32; + typedef logic unsigned [63:0] hpdcache_uint64; + typedef logic signed [63:0] hpdcache_int64; + // }}} + + // Definition of constants and types for HPDcache directory memory + // {{{ + localparam int unsigned HPDCACHE_CL_WIDTH = HPDCACHE_CL_WORDS*HPDCACHE_WORD_WIDTH; + localparam int unsigned HPDCACHE_OFFSET_WIDTH = $clog2(HPDCACHE_CL_WIDTH/8); + localparam int unsigned HPDCACHE_NLINE_WIDTH = HPDCACHE_PA_WIDTH - HPDCACHE_OFFSET_WIDTH; + localparam int unsigned HPDCACHE_SET_WIDTH = $clog2(HPDCACHE_SETS); + localparam int unsigned HPDCACHE_TAG_WIDTH = HPDCACHE_NLINE_WIDTH - HPDCACHE_SET_WIDTH; + localparam int unsigned HPDCACHE_WORD_IDX_WIDTH = $clog2(HPDCACHE_CL_WORDS); + + typedef logic unsigned [ HPDCACHE_OFFSET_WIDTH-1:0] hpdcache_offset_t; + typedef logic unsigned [ HPDCACHE_NLINE_WIDTH-1:0] hpdcache_nline_t; + typedef logic unsigned [ HPDCACHE_SET_WIDTH-1:0] hpdcache_set_t; + typedef logic unsigned [ HPDCACHE_TAG_WIDTH-1:0] hpdcache_tag_t; + typedef logic unsigned [ $clog2(HPDCACHE_WAYS)-1:0] hpdcache_way_t; + typedef logic unsigned [ HPDCACHE_WAYS-1:0] hpdcache_way_vector_t; + typedef logic unsigned [HPDCACHE_WORD_IDX_WIDTH-1:0] hpdcache_word_t; + + typedef struct packed { + hpdcache_tag_t tag; + logic [1:0] reserved; + } hpdcache_dir_entry_t; + + localparam int unsigned HPDCACHE_DIR_RAM_WIDTH = $bits(hpdcache_dir_entry_t); + localparam int unsigned HPDCACHE_DIR_RAM_DEPTH = HPDCACHE_SETS; + localparam int unsigned HPDCACHE_DIR_RAM_ADDR_WIDTH = $clog2(HPDCACHE_DIR_RAM_DEPTH); + + typedef logic [HPDCACHE_DIR_RAM_ADDR_WIDTH-1:0] hpdcache_dir_addr_t; + + function automatic hpdcache_way_t hpdcache_way_vector_to_index(input hpdcache_way_vector_t way); + for (int unsigned i = 0; i < HPDCACHE_WAYS; i++) begin + if (way[i]) return hpdcache_way_t'(i); + end + return 0; + endfunction + + // }}} + + // Definition of constants and types for HPDcache data memory + // {{{ + localparam int unsigned HPDCACHE_DATA_WAYS_PER_RAM_WORD = + hpdcache_params_pkg::PARAM_DATA_WAYS_PER_RAM_WORD; + + localparam int unsigned HPDCACHE_DATA_SETS_PER_RAM = /* FIXME this parameter is currently ignored */ + hpdcache_params_pkg::PARAM_DATA_SETS_PER_RAM; + + // HPDcache DATA RAM implements write byte enable + localparam bit HPDCACHE_DATA_RAM_WBYTEENABLE = + hpdcache_params_pkg::PARAM_DATA_RAM_WBYTEENABLE; + + // Define the number of memory contiguous words that can be accessed + // simultaneously from the cache. + // - This limits the maximum width for the data channel from requesters + // - This impacts the refill latency + localparam int unsigned HPDCACHE_ACCESS_WORDS = hpdcache_params_pkg::PARAM_ACCESS_WORDS; + + + localparam int unsigned HPDCACHE_DATA_RAM_WIDTH = + HPDCACHE_DATA_WAYS_PER_RAM_WORD*HPDCACHE_WORD_WIDTH; + localparam int unsigned HPDCACHE_DATA_RAM_Y_CUTS = HPDCACHE_WAYS/HPDCACHE_DATA_WAYS_PER_RAM_WORD; + localparam int unsigned HPDCACHE_DATA_RAM_X_CUTS = HPDCACHE_ACCESS_WORDS; + localparam int unsigned HPDCACHE_DATA_RAM_ACCESS_WIDTH = HPDCACHE_ACCESS_WORDS*HPDCACHE_WORD_WIDTH; + localparam int unsigned HPDCACHE_DATA_RAM_ENTR_PER_SET = HPDCACHE_CL_WORDS/HPDCACHE_ACCESS_WORDS; + localparam int unsigned HPDCACHE_DATA_RAM_DEPTH = HPDCACHE_SETS*HPDCACHE_DATA_RAM_ENTR_PER_SET; + localparam int unsigned HPDCACHE_DATA_RAM_ADDR_WIDTH = $clog2(HPDCACHE_DATA_RAM_DEPTH); + + typedef logic [ HPDCACHE_WORD_WIDTH-1:0] hpdcache_data_word_t; + typedef logic [ HPDCACHE_WORD_WIDTH/8-1:0] hpdcache_data_be_t; + typedef logic [ $clog2(HPDCACHE_DATA_RAM_Y_CUTS)-1:0] hpdcache_data_ram_row_idx_t; + typedef logic [ $clog2(HPDCACHE_DATA_WAYS_PER_RAM_WORD)-1:0] hpdcache_data_ram_way_idx_t; + + typedef logic [HPDCACHE_DATA_RAM_ADDR_WIDTH-1:0] hpdcache_data_ram_addr_t; + typedef hpdcache_data_word_t[HPDCACHE_DATA_WAYS_PER_RAM_WORD-1:0] hpdcache_data_ram_data_t; + typedef hpdcache_data_be_t [HPDCACHE_DATA_WAYS_PER_RAM_WORD-1:0] hpdcache_data_ram_be_t; + + typedef hpdcache_data_ram_data_t + [HPDCACHE_DATA_RAM_Y_CUTS-1:0] + [HPDCACHE_DATA_RAM_X_CUTS-1:0] + hpdcache_data_entry_t; + + typedef hpdcache_data_ram_be_t + [HPDCACHE_DATA_RAM_Y_CUTS-1:0] + [HPDCACHE_DATA_RAM_X_CUTS-1:0] + hpdcache_data_be_entry_t; + + typedef logic + [HPDCACHE_DATA_RAM_X_CUTS-1:0] + hpdcache_data_row_enable_t; + + typedef hpdcache_data_row_enable_t + [HPDCACHE_DATA_RAM_Y_CUTS-1:0] + hpdcache_data_enable_t; + + typedef hpdcache_data_ram_addr_t + [HPDCACHE_DATA_RAM_Y_CUTS-1:0] + [HPDCACHE_DATA_RAM_X_CUTS-1:0] + hpdcache_data_addr_t; + // }}} + + // Definition of interface with miss handler + // {{{ + localparam int unsigned HPDCACHE_REFILL_DATA_WIDTH = HPDCACHE_DATA_RAM_ACCESS_WIDTH; + + typedef hpdcache_data_word_t[HPDCACHE_ACCESS_WORDS-1:0] hpdcache_refill_data_t; + typedef hpdcache_data_be_t [HPDCACHE_ACCESS_WORDS-1:0] hpdcache_refill_be_t; + // }}} + + // Definition of interface with requesters + // {{{ + localparam int unsigned HPDCACHE_REQ_DATA_WIDTH = HPDCACHE_REQ_WORDS*HPDCACHE_WORD_WIDTH; + localparam int unsigned HPDCACHE_REQ_DATA_BYTES = HPDCACHE_REQ_DATA_WIDTH/8; + localparam int unsigned HPDCACHE_REQ_WORD_INDEX_WIDTH = $clog2(HPDCACHE_REQ_WORDS); + localparam int unsigned HPDCACHE_REQ_BYTE_OFFSET_WIDTH = $clog2(HPDCACHE_REQ_DATA_BYTES); + localparam int unsigned HPDCACHE_REQ_OFFSET_WIDTH = HPDCACHE_PA_WIDTH - HPDCACHE_TAG_WIDTH; + + typedef logic [HPDCACHE_PA_WIDTH-1:0] hpdcache_req_addr_t; + typedef logic [HPDCACHE_REQ_OFFSET_WIDTH-1:0] hpdcache_req_offset_t; + typedef hpdcache_data_word_t [HPDCACHE_REQ_WORDS-1:0] hpdcache_req_data_t; + typedef hpdcache_data_be_t [HPDCACHE_REQ_WORDS-1:0] hpdcache_req_be_t; + typedef logic [2:0] hpdcache_req_size_t; + typedef logic [HPDCACHE_REQ_SRC_ID_WIDTH-1:0] hpdcache_req_sid_t; + typedef logic [HPDCACHE_REQ_TRANS_ID_WIDTH-1:0] hpdcache_req_tid_t; + + // Definition of operation codes + // {{{ + typedef enum logic [3:0] { + HPDCACHE_REQ_LOAD = 4'h0, + HPDCACHE_REQ_STORE = 4'h1, + // RESERVED = 4'h2, + // RESERVED = 4'h3, + HPDCACHE_REQ_AMO_LR = 4'h4, + HPDCACHE_REQ_AMO_SC = 4'h5, + HPDCACHE_REQ_AMO_SWAP = 4'h6, + HPDCACHE_REQ_AMO_ADD = 4'h7, + HPDCACHE_REQ_AMO_AND = 4'h8, + HPDCACHE_REQ_AMO_OR = 4'h9, + HPDCACHE_REQ_AMO_XOR = 4'ha, + HPDCACHE_REQ_AMO_MAX = 4'hb, + HPDCACHE_REQ_AMO_MAXU = 4'hc, + HPDCACHE_REQ_AMO_MIN = 4'hd, + HPDCACHE_REQ_AMO_MINU = 4'he, + HPDCACHE_REQ_CMO = 4'hf + } hpdcache_req_op_t; + // }}} + + // Definition of CMO codes + // {{{ + typedef enum hpdcache_req_size_t { + HPDCACHE_REQ_CMO_FENCE = 3'h0, + // RESERVED = 3'h1, + HPDCACHE_REQ_CMO_INVAL_NLINE = 3'h2, + HPDCACHE_REQ_CMO_INVAL_SET_WAY = 3'h3, + HPDCACHE_REQ_CMO_INVAL_ALL = 3'h4, + HPDCACHE_REQ_CMO_PREFETCH = 3'h5 + } hpdcache_req_cmo_t; + // }}} + + // Definition of PMA flags + // {{{ + typedef struct packed + { + logic uncacheable; + logic io; // FIXME: for future use + } hpdcache_pma_t; + // }}} + + // Definition of interfaces + // {{{ + // Request Interface + typedef struct packed + { + hpdcache_req_offset_t addr_offset; + hpdcache_req_data_t wdata; + hpdcache_req_op_t op; + hpdcache_req_be_t be; + hpdcache_req_size_t size; + hpdcache_req_sid_t sid; + hpdcache_req_tid_t tid; + logic need_rsp; + + // only valid in case of physically indexed requests + logic phys_indexed; + hpdcache_tag_t addr_tag; + hpdcache_pma_t pma; + } hpdcache_req_t; + + // Response Interface + typedef struct packed + { + hpdcache_req_data_t rdata; + hpdcache_req_sid_t sid; + hpdcache_req_tid_t tid; + logic error; + logic aborted; + } hpdcache_rsp_t; + // }}} + + // Definition of functions + // {{{ + function automatic logic is_load(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_LOAD: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_store(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_STORE: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_LR, + HPDCACHE_REQ_AMO_SC, + HPDCACHE_REQ_AMO_SWAP, + HPDCACHE_REQ_AMO_ADD, + HPDCACHE_REQ_AMO_AND, + HPDCACHE_REQ_AMO_OR, + HPDCACHE_REQ_AMO_XOR, + HPDCACHE_REQ_AMO_MAX, + HPDCACHE_REQ_AMO_MAXU, + HPDCACHE_REQ_AMO_MIN, + HPDCACHE_REQ_AMO_MINU: + return 1'b1; + default: + return 1'b0; + endcase + endfunction + + function automatic logic is_amo_lr(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_LR: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_sc(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_SC: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_swap(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_SWAP: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_add(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_ADD: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_and(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_AND: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_or(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_OR: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_xor(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_XOR: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_max(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_MAX: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_maxu(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_MAXU: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_min(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_MIN: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_amo_minu(input hpdcache_req_op_t op); + case (op) + HPDCACHE_REQ_AMO_MINU: return 1'b1; + default: return 1'b0; + endcase + endfunction + + function automatic logic is_cmo_inval( + input hpdcache_req_op_t op, + input hpdcache_req_size_t sz); + case (op) + HPDCACHE_REQ_CMO: + case (sz) + HPDCACHE_REQ_CMO_INVAL_NLINE, + HPDCACHE_REQ_CMO_INVAL_SET_WAY, + HPDCACHE_REQ_CMO_INVAL_ALL: begin + return 1'b1; + end + default: begin + return 1'b0; + end + endcase + default: begin + return 1'b0; + end + endcase + endfunction + + function automatic logic is_cmo_inval_by_nline(input hpdcache_req_size_t sz); + return (sz == HPDCACHE_REQ_CMO_INVAL_NLINE); + endfunction + + function automatic logic is_cmo_inval_by_set(input hpdcache_req_size_t sz); + return (sz == HPDCACHE_REQ_CMO_INVAL_SET_WAY); + endfunction + + function automatic logic is_cmo_inval_all(input hpdcache_req_size_t sz); + return (sz == HPDCACHE_REQ_CMO_INVAL_ALL); + endfunction + + function automatic logic is_cmo_fence( + input hpdcache_req_op_t op, + input hpdcache_req_size_t sz); + case (op) + HPDCACHE_REQ_CMO: begin + return (sz == HPDCACHE_REQ_CMO_FENCE); + end + default: begin + return 1'b0; + end + endcase + endfunction + + function automatic logic is_cmo_prefetch( + input hpdcache_req_op_t op, + input hpdcache_req_size_t sz); + case (op) + HPDCACHE_REQ_CMO: begin + return (sz == HPDCACHE_REQ_CMO_PREFETCH); + end + default: begin + return 1'b0; + end + endcase + endfunction + + function automatic hpdcache_tag_t hpdcache_get_req_addr_tag(input hpdcache_req_addr_t addr); + return addr[(HPDCACHE_OFFSET_WIDTH + HPDCACHE_SET_WIDTH) +: HPDCACHE_TAG_WIDTH]; + endfunction + + function automatic hpdcache_set_t hpdcache_get_req_addr_set(input hpdcache_req_addr_t addr); + return addr[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_SET_WIDTH]; + endfunction + + function automatic hpdcache_word_t hpdcache_get_req_addr_word(input hpdcache_req_addr_t addr); + return addr[$clog2(HPDCACHE_WORD_WIDTH/8) +: HPDCACHE_WORD_IDX_WIDTH]; + endfunction + + function automatic hpdcache_offset_t hpdcache_get_req_addr_offset(input hpdcache_req_addr_t addr); + return addr[0 +: HPDCACHE_OFFSET_WIDTH]; + endfunction + + function automatic hpdcache_nline_t hpdcache_get_req_addr_nline(input hpdcache_req_addr_t addr); + return addr[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_NLINE_WIDTH]; + endfunction + + function automatic hpdcache_set_t hpdcache_get_req_offset_set(input hpdcache_req_offset_t offset); + return offset[HPDCACHE_OFFSET_WIDTH +: HPDCACHE_SET_WIDTH]; + endfunction + + function automatic hpdcache_word_t hpdcache_get_req_offset_word(input hpdcache_req_offset_t offset); + return offset[$clog2(HPDCACHE_WORD_WIDTH/8) +: HPDCACHE_WORD_IDX_WIDTH]; + endfunction + + // }}} + // }}} + + // Definition of constants and types for the Miss Status Holding Register (MSHR) + // {{{ + + // HPDcache MSHR number of sets + localparam int unsigned HPDCACHE_MSHR_SETS = + hpdcache_params_pkg::PARAM_MSHR_SETS; + + // HPDcache MSHR number of ways + localparam int unsigned HPDCACHE_MSHR_WAYS = + hpdcache_params_pkg::PARAM_MSHR_WAYS; + + // HPDcache MSHR number of ways in the same SRAM word + localparam int unsigned HPDCACHE_MSHR_WAYS_PER_RAM_WORD = + hpdcache_params_pkg::PARAM_MSHR_WAYS_PER_RAM_WORD; /* FIXME this parameter is currently ignored */ + + // HPDcache MSHR number of sets in the same SRAM + localparam int unsigned HPDCACHE_MSHR_SETS_PER_RAM = + hpdcache_params_pkg::PARAM_MSHR_SETS_PER_RAM; /* FIXME this parameter is currently ignored */ + + // HPDcache MSHR implements write byte enable + localparam bit HPDCACHE_MSHR_RAM_WBYTEENABLE = + hpdcache_params_pkg::PARAM_MSHR_RAM_WBYTEENABLE; + localparam bit HPDCACHE_MSHR_USE_REGBANK = + hpdcache_params_pkg::PARAM_MSHR_USE_REGBANK; + + localparam int unsigned HPDCACHE_MSHR_SET_WIDTH = $clog2(HPDCACHE_MSHR_SETS); + localparam int unsigned HPDCACHE_MSHR_WAY_WIDTH = $clog2(HPDCACHE_MSHR_WAYS); + localparam int unsigned HPDCACHE_MSHR_TAG_WIDTH = HPDCACHE_NLINE_WIDTH - HPDCACHE_MSHR_SET_WIDTH; + + typedef logic unsigned [HPDCACHE_MSHR_SET_WIDTH-1:0] mshr_set_t; + typedef logic unsigned [HPDCACHE_MSHR_TAG_WIDTH-1:0] mshr_tag_t; + typedef logic unsigned [HPDCACHE_MSHR_WAY_WIDTH-1:0] mshr_way_t; + // }}} + + // Definition of interface with memory + // {{{ + typedef logic [7:0] hpdcache_mem_len_t; + typedef logic [2:0] hpdcache_mem_size_t; + + typedef enum logic [1:0] { + HPDCACHE_MEM_RESP_OK = 2'b00, + HPDCACHE_MEM_RESP_NOK = 2'b01 + } hpdcache_mem_error_e; + + typedef enum logic [1:0] { + HPDCACHE_MEM_READ = 2'b00, + HPDCACHE_MEM_WRITE = 2'b01, + HPDCACHE_MEM_ATOMIC = 2'b10 + // Reserved = 2'b11 - TODO: CMO ? + } hpdcache_mem_command_e; + + typedef enum logic [3:0] { + HPDCACHE_MEM_ATOMIC_ADD = 4'b0000, + HPDCACHE_MEM_ATOMIC_CLR = 4'b0001, + HPDCACHE_MEM_ATOMIC_SET = 4'b0010, + HPDCACHE_MEM_ATOMIC_EOR = 4'b0011, + HPDCACHE_MEM_ATOMIC_SMAX = 4'b0100, + HPDCACHE_MEM_ATOMIC_SMIN = 4'b0101, + HPDCACHE_MEM_ATOMIC_UMAX = 4'b0110, + HPDCACHE_MEM_ATOMIC_UMIN = 4'b0111, + HPDCACHE_MEM_ATOMIC_SWAP = 4'b1000, + // Reserved = 4'b1001, + // Reserved = 4'b1010, + // Reserved = 4'b1011, + HPDCACHE_MEM_ATOMIC_LDEX = 4'b1100, + HPDCACHE_MEM_ATOMIC_STEX = 4'b1101 + // Reserved = 4'b1110, + // Reserved = 4'b1111 + } hpdcache_mem_atomic_e; + + function automatic hpdcache_mem_size_t get_hpdcache_mem_size(int unsigned bytes); + if (bytes == 0) return 0; + else if (bytes <= 2) return 1; + else if (bytes <= 4) return 2; + else if (bytes <= 8) return 3; + else if (bytes <= 16) return 4; + else if (bytes <= 32) return 5; + else if (bytes <= 64) return 6; + else if (bytes <= 128) return 7; + // pragma translate_off + else $error("hpdcache: unsupported number of bytes"); + // pragma translate_on + endfunction + // }}} + + // Definition of constants and types for the Write Buffer (WBUF) + // {{{ + localparam int unsigned HPDCACHE_WBUF_DIR_ENTRIES = + hpdcache_params_pkg::PARAM_WBUF_DIR_ENTRIES; + + localparam int unsigned HPDCACHE_WBUF_DATA_ENTRIES = + hpdcache_params_pkg::PARAM_WBUF_DATA_ENTRIES; + + localparam int unsigned HPDCACHE_WBUF_WORDS = + hpdcache_params_pkg::PARAM_WBUF_WORDS; + + localparam int unsigned HPDCACHE_WBUF_TIMECNT_WIDTH = + hpdcache_params_pkg::PARAM_WBUF_TIMECNT_WIDTH; + + localparam int unsigned HPDCACHE_WBUF_DATA_WIDTH = HPDCACHE_REQ_DATA_WIDTH* + HPDCACHE_WBUF_WORDS; + localparam int unsigned HPDCACHE_WBUF_DATA_PTR_WIDTH = $clog2(HPDCACHE_WBUF_DATA_ENTRIES); + localparam int unsigned HPDCACHE_WBUF_DIR_PTR_WIDTH = $clog2(HPDCACHE_WBUF_DIR_ENTRIES); + + typedef hpdcache_req_addr_t wbuf_addr_t; + typedef hpdcache_nline_t wbuf_match_t; + typedef hpdcache_req_data_t wbuf_data_t; + typedef hpdcache_req_be_t wbuf_be_t; + typedef wbuf_data_t[HPDCACHE_WBUF_WORDS-1:0] wbuf_data_buf_t; + typedef wbuf_be_t [HPDCACHE_WBUF_WORDS-1:0] wbuf_be_buf_t; + typedef logic unsigned [ HPDCACHE_WBUF_TIMECNT_WIDTH-1:0] wbuf_timecnt_t; + typedef logic unsigned [ HPDCACHE_WBUF_DIR_PTR_WIDTH-1:0] wbuf_dir_ptr_t; + typedef logic unsigned [HPDCACHE_WBUF_DATA_PTR_WIDTH-1:0] wbuf_data_ptr_t; + // }}} + + // Definition of constants and types for the Replay Table (RTAB) + // {{{ + localparam int HPDCACHE_RTAB_ENTRIES = hpdcache_params_pkg::PARAM_RTAB_ENTRIES; + + typedef logic [$clog2(HPDCACHE_RTAB_ENTRIES)-1:0] rtab_ptr_t; + // }}} + + // Definition of constants and types for the uncacheable request handler (UC) + // {{{ + typedef struct packed { + logic is_ld; + logic is_st; + logic is_amo_lr; + logic is_amo_sc; + logic is_amo_swap; + logic is_amo_add; + logic is_amo_and; + logic is_amo_or; + logic is_amo_xor; + logic is_amo_max; + logic is_amo_maxu; + logic is_amo_min; + logic is_amo_minu; + } hpdcache_uc_op_t; + // }}} + + // Definition of constants and types for the CMO request handler (CMOH) + // {{{ + typedef struct packed { + logic is_inval_by_nline; + logic is_inval_by_set; + logic is_inval_all; + logic is_fence; + } hpdcache_cmoh_op_t; + // }}} +endpackage diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_plru.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_plru.sv new file mode 100644 index 00000000000..7697737d684 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_plru.sv @@ -0,0 +1,138 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : May, 2021 + * Description : HPDcache Pseudo-LRU replacement policy + * History : + */ +module hpdcache_plru + // Parameters + // {{{ +#( + parameter int unsigned SETS = 0, + parameter int unsigned WAYS = 0, + + localparam type set_t = logic [$clog2(SETS)-1:0], + localparam type way_vector_t = logic [WAYS-1:0] +) + // }}} + + // Ports + // {{{ +( + input logic clk_i, + input logic rst_ni, + + // PLRU update interface + input logic updt_i, + input set_t updt_set_i, + input way_vector_t updt_way_i, + + // Victim replacement interface + input logic repl_i, + input set_t repl_set_i, + input way_vector_t repl_dir_valid_i, + input logic repl_updt_plru_i, + + output way_vector_t victim_way_o +); + // }}} + + // Internal signals and registers + // {{{ + way_vector_t [SETS-1:0] plru_q, plru_d; + way_vector_t updt_plru; + way_vector_t repl_plru; + way_vector_t used_victim_way, unused_victim_way; + // }}} + + // Victim way selection + // {{{ + hpdcache_prio_1hot_encoder #(.N(WAYS)) + used_victim_select_i ( + .val_i (~plru_q[repl_set_i]), + .val_o (used_victim_way) + ); + + hpdcache_prio_1hot_encoder #(.N(WAYS)) + unused_victim_select_i ( + .val_i (~repl_dir_valid_i), + .val_o (unused_victim_way) + ); + + // If there is a free entry in the directory (valid == 0), choose it as victim + assign victim_way_o = |unused_victim_way ? unused_victim_way : used_victim_way; + // }}} + + // Pseudo-LRU update process + // {{{ + assign updt_plru = plru_q[updt_set_i] | updt_way_i; + assign repl_plru = plru_q[repl_set_i] | victim_way_o; + + always_comb + begin : plru_update_comb + plru_d = plru_q; + + case (1'b1) + // When replacing a cache-line, set the PLRU bit of the new line + repl_i: + if (repl_updt_plru_i) begin + // If all PLRU bits of a given would be set, reset them all + // but the currently accessed way + if (&repl_plru) begin + plru_d[repl_set_i] = victim_way_o; + end else begin + plru_d[repl_set_i] = repl_plru; + end + end + + // When accessing a cache-line, set the corresponding PLRU bit + updt_i: + // If all PLRU bits of a given would be set, reset them all + // but the currently accessed way + if (&updt_plru) begin + plru_d[updt_set_i] = updt_way_i; + end else begin + plru_d[updt_set_i] = updt_plru; + end + + default: begin + // do nothing + end + endcase + end + // }}} + + // Set state process + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin : lru_ff + if (!rst_ni) begin + plru_q <= '0; + end else begin + if (updt_i || repl_i) begin + plru_q <= plru_d; + end + end + end + // }}} + +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_rtab.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_rtab.sv new file mode 100755 index 00000000000..6df18bc2878 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_rtab.sv @@ -0,0 +1,666 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : September, 2021 + * Description : HPDcache Replay Table + * History : + */ +module hpdcache_rtab +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter type rtab_entry_t = logic +) +// }}} +// Ports +// {{{ +( + // Clock and reset signals + input logic clk_i, + input logic rst_ni, + + // Global control signals + output logic empty_o, // RTAB is empty + output logic full_o, // RTAB is full + + // Check RTAB signals + // This interface allows to check if there is an address-overlapping + // request in the RTAB with respect to the given nline. + input logic check_i, // Check for hit (nline) in the RTAB + input hpdcache_nline_t check_nline_i, + output logic check_hit_o, + + // Allocate signals + // This interface allows to allocate a new request in a new linked list + input logic alloc_i, + input logic alloc_and_link_i, + input rtab_entry_t alloc_req_i, + input logic alloc_mshr_hit_i, + input logic alloc_mshr_full_i, + input logic alloc_mshr_ready_i, + input logic alloc_wbuf_hit_i, + input logic alloc_wbuf_not_ready_i, + + // Pop signals + // This interface allows to read (and remove) a request from the RTAB + output logic pop_try_valid_o, // Request ready to be replayed + input logic pop_try_i, + output rtab_entry_t pop_try_req_o, + output rtab_ptr_t pop_try_ptr_o, + + // Pop Commit signals + // This interface allows to actually remove a popped request + input logic pop_commit_i, + input rtab_ptr_t pop_commit_ptr_i, + + // Pop Rollback signals + // This interface allows to put back a popped request + input logic pop_rback_i, + input rtab_ptr_t pop_rback_ptr_i, + input logic pop_rback_mshr_hit_i, + input logic pop_rback_mshr_full_i, + input logic pop_rback_mshr_ready_i, + input logic pop_rback_wbuf_hit_i, + input logic pop_rback_wbuf_not_ready_i, + + + // Control signals from/to WBUF + output hpdcache_req_addr_t wbuf_addr_o, // address to check against ongoing writes + output logic wbuf_is_read_o, // monitored request is read + input logic wbuf_hit_open_i, // Hit on open entry in the write buf + input logic wbuf_hit_pend_i, // Hit on pend entry in the write buf + input logic wbuf_hit_sent_i, // Hit on sent entry in the write buf + input logic wbuf_not_ready_i, // Write buffer cannot accept the write + + // Control signals from the Miss Handler + input logic miss_ready_i, // Miss Handler is ready + + // Control signals from the Refill Handler + input logic refill_i, // Active refill + input hpdcache_nline_t refill_nline_i, // Cache-line index being refilled + + // Configuration parameters + input logic cfg_single_entry_i // Enable only one entry of the table +); +// }}} + +// Definition of constants, types and functions +// {{{ + localparam int N = HPDCACHE_RTAB_ENTRIES; + + function automatic rtab_ptr_t rtab_bv_to_index( + input logic [N-1:0] bv); + for (int i = 0; i < N; i++) begin + if (bv[i]) return rtab_ptr_t'(i); + end + return 0; + endfunction + + function automatic logic [N-1:0] rtab_index_to_bv( + input rtab_ptr_t index); + logic [N-1:0] bv; + + for (int i = 0; i < N; i++) begin + bv[i] = (rtab_ptr_t'(i) == index); + end + return bv; + endfunction + + function automatic bit rtab_mshr_set_equal( + input hpdcache_nline_t x, + input hpdcache_nline_t y); + return (x[0 +: HPDCACHE_MSHR_SET_WIDTH] == y[0 +: HPDCACHE_MSHR_SET_WIDTH]); + endfunction + + function automatic logic [N-1:0] rtab_next(rtab_ptr_t [N-1:0] next, rtab_ptr_t x); + return rtab_index_to_bv(next[x]); + endfunction + + typedef enum { + POP_TRY_HEAD, + POP_TRY_NEXT, + POP_TRY_NEXT_WAIT + } rtab_pop_try_state_e; +// }}} + +// Internal signals and registers +// {{{ + rtab_entry_t [N-1:0] req_q; + rtab_ptr_t [N-1:0] next_q; + + rtab_pop_try_state_e pop_try_state_q, pop_try_state_d; + logic [N-1:0] pop_try_next_q, pop_try_next_d; + + logic [N-1:0] valid_q; + logic [N-1:0] valid_set, valid_rst; + logic [N-1:0] alloc_valid_set; + logic [N-1:0] pop_commit_valid_rst; + + // Bits indicating if the corresponding entry is the head of a linked list + logic [N-1:0] head_q; + logic [N-1:0] head_set, head_rst; + logic [N-1:0] alloc_head_set, alloc_head_rst; + logic [N-1:0] pop_try_head_rst; + logic [N-1:0] pop_commit_head_set; + logic [N-1:0] pop_rback_head_set; + + // Bits indicating if the corresponding entry is the tail of a linked list + logic [N-1:0] tail_q; + logic [N-1:0] tail_set, tail_rst; + logic [N-1:0] alloc_tail_set, alloc_tail_rst; + + // There is a pend ing miss on the target nline + logic [N-1:0] deps_mshr_hit_q; + logic [N-1:0] deps_mshr_hit_set, deps_mshr_hit_rst; + logic [N-1:0] alloc_deps_mshr_hit_set; + logic [N-1:0] pop_rback_deps_mshr_hit_set; + + // The MSHR has no available slot for the new miss + logic [N-1:0] deps_mshr_full_q; + logic [N-1:0] deps_mshr_full_set, deps_mshr_full_rst; + logic [N-1:0] alloc_deps_mshr_full_set; + logic [N-1:0] pop_rback_deps_mshr_full_set; + + // The MSHR is not ready to send a new miss requests + logic [N-1:0] deps_mshr_ready_q; + logic [N-1:0] deps_mshr_ready_set, deps_mshr_ready_rst; + logic [N-1:0] alloc_deps_mshr_ready_set; + logic [N-1:0] pop_rback_deps_mshr_ready_set; + + // Hit on an non-e mpty entry of the write buffer + logic [N-1:0] deps_wbuf_hit_q; + logic [N-1:0] deps_wbuf_hit_set, deps_wbuf_hit_rst; + logic [N-1:0] alloc_deps_wbuf_hit_set; + logic [N-1:0] pop_rback_deps_wbuf_hit_set; + + // Hit on a pend entry of the write buffer + logic [N-1:0] deps_wbuf_not_ready_q; + logic [N-1:0] deps_wbuf_not_ready_set, deps_wbuf_not_ready_rst; + logic [N-1:0] alloc_deps_wbuf_not_ready_set; + logic [N-1:0] pop_rback_deps_wbuf_not_ready_set; + + logic [N-1:0] nodeps; + hpdcache_nline_t [N-1:0] nline; + hpdcache_req_addr_t [N-1:0] addr; + logic [N-1:0] is_read; + logic [N-1:0] check_hit; + logic [N-1:0] match_check_nline; + logic [N-1:0] match_check_tail; + logic [N-1:0] match_refill_nline; + logic [N-1:0] match_refill_mshr_set; + + logic [N-1:0] free; + logic [N-1:0] free_alloc; + logic alloc; + + logic [N-1:0] pop_match_next; + logic [N-1:0] pop_rback_ptr_bv; + logic [N-1:0] pop_try_bv; + logic [N-1:0] ready; + + genvar gen_i; +// }}} + +// Compute global control signals +// {{{ + // compute if entries are ready to be replayed + assign nodeps = ~(deps_mshr_hit_q | + deps_mshr_full_q | + deps_mshr_ready_q | + deps_wbuf_hit_q | + deps_wbuf_not_ready_q); + + assign ready = valid_q & head_q & nodeps; + + assign free = ~valid_q; + + // compute the free vector (one-hot signal) + hpdcache_prio_1hot_encoder #( + .N (N) + ) free_encoder_i ( + .val_i (free), + .val_o (free_alloc) + ); + + // full and empty signals + assign empty_o = &(~valid_q); + assign full_o = &( valid_q) | (|valid_q & cfg_single_entry_i); +// }}} + +// Check interface +// {{{ + generate + for (gen_i = 0; gen_i < N; gen_i++) begin : check_gen + assign addr[gen_i] = {req_q[gen_i].addr_tag, req_q[gen_i].addr_offset}, + nline[gen_i] = hpdcache_get_req_addr_nline(addr[gen_i]), + match_check_nline[gen_i] = (check_nline_i == nline[gen_i]); + + assign is_read[gen_i] = is_load(req_q[gen_i].op) | + is_cmo_prefetch(req_q[gen_i].op, req_q[gen_i].size); + end + endgenerate + + assign check_hit = valid_q & match_check_nline, + check_hit_o = |check_hit, + match_check_tail = check_hit & tail_q; +// }}} + +// Allocation process +// {{{ + assign alloc = alloc_i | alloc_and_link_i; + + // Set the valid bit-vector of the replay table + assign alloc_valid_set = free_alloc & {N{alloc}}; + + // Set of head and tail bit-vectors during an allocation + // - The head bit is only set when creating a new linked-list + // - The tail bit is always set because new requests are added on the tail. + assign alloc_head_set = free_alloc & {N{alloc_i}}, + alloc_tail_set = alloc_valid_set; + + // Reset of head and tail bit-vectors during an allocation + // - When doing an allocation and link, head bit shall be reset + // - when doing an allocation and link, the "prev" tail shall be reset + assign alloc_head_rst = free_alloc & {N{alloc_and_link_i}}, + alloc_tail_rst = match_check_tail & {N{alloc_and_link_i}}; + + // Set the dependency bits for the allocated entry + assign alloc_deps_mshr_hit_set = alloc_valid_set & {N{ alloc_mshr_hit_i}}, + alloc_deps_mshr_full_set = alloc_valid_set & {N{ alloc_mshr_full_i}}, + alloc_deps_mshr_ready_set = alloc_valid_set & {N{ alloc_mshr_ready_i}}, + alloc_deps_wbuf_hit_set = alloc_valid_set & {N{ alloc_wbuf_hit_i}}, + alloc_deps_wbuf_not_ready_set = alloc_valid_set & {N{alloc_wbuf_not_ready_i}}; +// }}} + +// Update replay table dependencies +// {{{ + // Update write buffer hit dependencies + // {{{ + // Build a bit-vector with HEAD requests waiting for a conflict in the wbuf + logic [N-1:0] wbuf_rd_pending, wbuf_wr_pending; + logic [N-1:0] wbuf_rd_gnt, wbuf_wr_gnt; + logic [ 1:0] wbuf_pending; + logic [ 1:0] wbuf_gnt; + logic wbuf_ready; + logic [N-1:0] wbuf_sel; + + assign wbuf_rd_pending = valid_q & head_q & deps_wbuf_hit_q, + wbuf_wr_pending = valid_q & head_q & deps_wbuf_not_ready_q; + + // Choose in a round-robin manner a ready transaction waiting for a conflict in the wbuf + hpdcache_rrarb #( + .N (N) + ) wbuf_rd_pending_arb_i ( + .clk_i, + .rst_ni, + .req_i (wbuf_rd_pending), + .gnt_o (wbuf_rd_gnt), + .ready_i (wbuf_gnt[0] & wbuf_ready) + ); + + hpdcache_rrarb #( + .N (N) + ) wbuf_wr_pending_arb_i ( + .clk_i, + .rst_ni, + .req_i (wbuf_wr_pending), + .gnt_o (wbuf_wr_gnt), + .ready_i (wbuf_gnt[1] & wbuf_ready) + ); + + assign wbuf_pending = {|wbuf_wr_gnt, |wbuf_rd_gnt}, + wbuf_ready = |(pop_try_bv & (wbuf_rd_gnt | wbuf_wr_gnt)); + + hpdcache_fxarb #( + .N (2) + ) wbuf_pending_arb_i ( + .clk_i, + .rst_ni, + .req_i (wbuf_pending), + .gnt_o (wbuf_gnt), + .ready_i (wbuf_ready) + ); + + assign wbuf_sel = wbuf_gnt[0] ? wbuf_rd_gnt : + wbuf_gnt[1] ? wbuf_wr_gnt : '0; + + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH ($bits(hpdcache_req_addr_t)), + .ONE_HOT_SEL (1'b1) + ) wbuf_pending_addr_mux_i ( + .data_i (addr), + .sel_i (wbuf_sel), + .data_o (wbuf_addr_o) + ); + + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH (1), + .ONE_HOT_SEL (1'b1) + ) wbuf_pending_is_read_mux_i ( + .data_i (is_read), + .sel_i (wbuf_sel), + .data_o (wbuf_is_read_o) + ); + + // reset write buffer dependency bits with the output from the write buffer + assign deps_wbuf_hit_rst = + wbuf_sel & ~{N{wbuf_hit_open_i | wbuf_hit_pend_i | wbuf_hit_sent_i}}; + assign deps_wbuf_not_ready_rst = + wbuf_sel & ~{N{wbuf_not_ready_i}}; + // }}} + + // Update miss handler dependency + // {{{ + assign deps_mshr_ready_rst = {N{miss_ready_i}}; + // }}} + + // Update refill dependencies + // {{{ + generate + for (gen_i = 0; gen_i < N; gen_i++) begin : match_refill_gen + assign match_refill_mshr_set[gen_i] = + rtab_mshr_set_equal(refill_nline_i, nline[gen_i]); + assign match_refill_nline[gen_i] = + (refill_nline_i == nline[gen_i]); + end + endgenerate + + assign deps_mshr_full_rst = {N{refill_i}} & match_refill_mshr_set; + assign deps_mshr_hit_rst = {N{refill_i}} & match_refill_nline; + // }}} +// }}} + +// Pop interface +// {{{ + logic [N-1:0] pop_sel; + logic [N-1:0] pop_commit_bv; + + assign pop_commit_bv = rtab_index_to_bv(pop_commit_ptr_i); + + // Pop try process + // {{{ + logic [N-1:0] pop_gnt; + logic pop_head; + + hpdcache_rrarb #( + .N (N) + ) pop_arb_i ( + .clk_i, + .rst_ni, + .req_i (ready), + .gnt_o (pop_gnt), + .ready_i (pop_head) + ); + + always_comb + begin : req_valid_comb + case(pop_try_state_q) + POP_TRY_HEAD : pop_try_valid_o = |ready; + POP_TRY_NEXT : pop_try_valid_o = 1'b1; + POP_TRY_NEXT_WAIT: pop_try_valid_o = 1'b1; + default : pop_try_valid_o = 1'b0; + endcase + end + + always_comb + begin : pop_entry_sel_comb + pop_try_state_d = pop_try_state_q; + pop_try_next_d = pop_try_next_q; + pop_head = 1'b0; + pop_sel = '0; + + case (pop_try_state_q) + POP_TRY_HEAD: begin + // This FSM may be in this state after forwarding the tail of + // a list. In that case, a rollback may arrive in this cycle. + pop_sel = pop_gnt; + if (!pop_rback_i && pop_try_valid_o) begin + if (pop_try_i) begin + // If the request interface accepts the request, go to the next request + // in the list (if the current request is not the tail). Otherwise, stay in + // the same state to to forward a request from a new list + pop_head = 1'b1; + if ((pop_gnt & ~tail_q) != 0) begin + pop_try_state_d = POP_TRY_NEXT; + pop_try_next_d = rtab_next(next_q, pop_try_ptr_o); + end + end + end + end + POP_TRY_NEXT: begin + pop_sel = pop_try_next_q; + if (pop_rback_i) begin + pop_try_state_d = POP_TRY_HEAD; + end else begin + if (pop_try_i) begin + // If the request interface accepts the new request, go to the next request + // in the list (if the current request is not the tail). Otherwise, return + // to the POP_TRY_HEAD state to forward a request from a new list + if ((pop_try_next_q & ~tail_q) != 0) begin + pop_try_state_d = POP_TRY_NEXT; + pop_try_next_d = rtab_next(next_q, pop_try_ptr_o); + end else begin + pop_try_state_d = POP_TRY_HEAD; + end + end else begin + // If the request interface is not ready to consume the new request, wait + // until it is + pop_try_state_d = POP_TRY_NEXT_WAIT; + end + end + end + POP_TRY_NEXT_WAIT: begin + // Wait for the current request to be accepted. Then go to the next request in the + // list or to a new list + pop_sel = pop_try_next_q; + if (pop_try_i) begin + if ((pop_try_next_q & ~tail_q) != 0) begin + pop_try_state_d = POP_TRY_NEXT; + pop_try_next_d = rtab_next(next_q, pop_try_ptr_o); + end else begin + pop_try_state_d = POP_TRY_HEAD; + end + end + end + default: begin + end + endcase + end + + assign pop_commit_head_set = '0; + + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH ($bits(rtab_entry_t)), + .ONE_HOT_SEL (1'b1) + ) pop_mux_i ( + .data_i (req_q), + .sel_i (pop_sel), + .data_o (pop_try_req_o) + ); + + // Temporarily unset the head bit of the popped request to prevent it to be rescheduled + assign pop_try_bv = pop_sel & {N{pop_try_i}}, + pop_try_head_rst = pop_try_bv; + + + // Forward the index of the entry being popped. This is used later by the + // commit or rollback operations + assign pop_try_ptr_o = rtab_bv_to_index(pop_sel); + + // }}} + + // Pop commit process + // {{{ + // Invalidate the entry being popped (head of the linked list) + assign pop_commit_valid_rst = {N{pop_commit_i}} & rtab_index_to_bv(pop_commit_ptr_i); + // }}} + + // Pop rollback process + // {{{ + // Set again the head bit of the rolled-back request + assign pop_rback_ptr_bv = rtab_index_to_bv(pop_rback_ptr_i); + + assign pop_rback_head_set = {N{pop_rback_i}} & pop_rback_ptr_bv; + + assign pop_rback_deps_mshr_hit_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_mshr_hit_i}}, + pop_rback_deps_mshr_full_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_mshr_full_i}}, + pop_rback_deps_mshr_ready_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_mshr_ready_i}}, + pop_rback_deps_wbuf_hit_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_wbuf_hit_i}}, + pop_rback_deps_wbuf_not_ready_set = {N{pop_rback_i}} & pop_rback_ptr_bv & {N{pop_rback_wbuf_not_ready_i}}; + // }}} +// }}} + +// Internal state assignment +// {{{ + assign head_set = alloc_head_set | pop_commit_head_set | pop_rback_head_set, + head_rst = alloc_head_rst | pop_try_head_rst; + + assign tail_set = alloc_tail_set, + tail_rst = alloc_tail_rst; + + assign valid_set = alloc_valid_set, + valid_rst = pop_commit_valid_rst; + + assign deps_mshr_hit_set = alloc_deps_mshr_hit_set | pop_rback_deps_mshr_hit_set, + deps_mshr_full_set = alloc_deps_mshr_full_set | pop_rback_deps_mshr_full_set, + deps_mshr_ready_set = alloc_deps_mshr_ready_set | pop_rback_deps_mshr_ready_set, + deps_wbuf_hit_set = alloc_deps_wbuf_hit_set | pop_rback_deps_wbuf_hit_set, + deps_wbuf_not_ready_set = alloc_deps_wbuf_not_ready_set | pop_rback_deps_wbuf_not_ready_set; + + always_ff @(posedge clk_i or negedge rst_ni) + begin : rtab_valid_ff + if (!rst_ni) begin + valid_q <= '0; + head_q <= '0; + tail_q <= '0; + deps_mshr_hit_q <= '0; + deps_mshr_full_q <= '0; + deps_mshr_ready_q <= '0; + deps_wbuf_hit_q <= '0; + deps_wbuf_not_ready_q <= '0; + next_q <= '0; + end else begin + valid_q <= (~valid_q & valid_set) | + ( valid_q & ~valid_rst); + + // update head and tail flags + head_q <= (~head_q & head_set) | + ( head_q & ~head_rst); + + tail_q <= (~tail_q & tail_set) | + ( tail_q & ~tail_rst); + + // update dependency flags + deps_mshr_hit_q <= (~deps_mshr_hit_q & deps_mshr_hit_set) | + ( deps_mshr_hit_q & ~deps_mshr_hit_rst); + deps_mshr_full_q <= (~deps_mshr_full_q & deps_mshr_full_set) | + ( deps_mshr_full_q & ~deps_mshr_full_rst); + deps_mshr_ready_q <= (~deps_mshr_ready_q & deps_mshr_ready_set) | + ( deps_mshr_ready_q & ~deps_mshr_ready_rst); + deps_wbuf_hit_q <= (~deps_wbuf_hit_q & deps_wbuf_hit_set) | + ( deps_wbuf_hit_q & ~deps_wbuf_hit_rst); + deps_wbuf_not_ready_q <= (~deps_wbuf_not_ready_q & deps_wbuf_not_ready_set) | + ( deps_wbuf_not_ready_q & ~deps_wbuf_not_ready_rst); + + // update the next pointers + for (int i = 0; i < N; i++) begin + if (alloc_and_link_i && match_check_tail[i]) begin + next_q[i] <= rtab_bv_to_index(free_alloc); + end + end + end + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : pop_try_ff + if (!rst_ni) begin + pop_try_state_q <= POP_TRY_HEAD; + pop_try_next_q <= '0; + end else begin + pop_try_state_q <= pop_try_state_d; + pop_try_next_q <= pop_try_next_d; + end + end + + always_ff @(posedge clk_i) + begin : rtab_ff + for (int i = 0; i < N; i++) begin + // update the request array + if (valid_set[i]) begin + req_q[i] <= alloc_req_i; + end + end + end +// }}} + +// Assertions +// {{{ +// pragma translate_off + assert property (@(posedge clk_i) + check_i |-> $onehot0(match_check_tail)) else + $error("rtab: more than one entry matching"); + + assert property (@(posedge clk_i) + alloc_and_link_i |-> (check_i & check_hit_o)) else + $error("rtab: alloc and link shall be performed in case of check hit"); + + assert property (@(posedge clk_i) + alloc_and_link_i |-> + ({alloc_req_i.addr_tag, hpdcache_get_req_offset_set(alloc_req_i.addr_offset)} == + check_nline_i)) else + $error("rtab: nline for alloc and link shall match the one being checked"); + + assert property (@(posedge clk_i) + alloc_i |-> !alloc_and_link_i) else + $error("rtab: only one allocation per cycle is allowed"); + +`ifndef VERILATOR + assert property (@(posedge clk_i) + pop_try_i |-> ##1 (pop_commit_i | pop_rback_i)) else + $error("rtab: a pop try shall be followed by a commit or rollback"); +`endif + + assert property (@(posedge clk_i) + pop_commit_i |-> valid_q[pop_commit_ptr_i]) else + $error("rtab: commiting an invalid entry"); + + assert property (@(posedge clk_i) + pop_rback_i |-> valid_q[pop_rback_ptr_i]) else + $error("rtab: rolling-back an invalid entry"); + + assert property (@(posedge clk_i) + pop_rback_i |-> !pop_try_i) else + $error("rtab: cache shall not accept a new request while rolling back"); + + assert property (@(posedge clk_i) + alloc |-> ~full_o) else + $error("rtab: trying to allocate while the table is full"); + + assert property (@(posedge clk_i) + alloc_and_link_i |-> ~cfg_single_entry_i) else + $error("rtab: trying to link a request in single entry mode"); +// pragma translate_on +// }}} +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_uncached.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_uncached.sv new file mode 100644 index 00000000000..ba449dc1be2 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_uncached.sv @@ -0,0 +1,965 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : May, 2021 + * Description : HPDcache uncached and AMO request handler + * History : + */ +module hpdcache_uncached +import hpdcache_pkg::*; + // Parameters + // {{{ +#( + parameter int HPDcacheMemIdWidth = 8, + parameter int HPDcacheMemDataWidth = 512, + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_req_w_t = logic, + parameter type hpdcache_mem_resp_r_t = logic, + parameter type hpdcache_mem_resp_w_t = logic, + + localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0] +) + // }}} +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // Global control signals + // {{{ + input logic wbuf_empty_i, + input logic mshr_empty_i, + input logic rtab_empty_i, + input logic ctrl_empty_i, + // }}} + + // Cache-side request interface + // {{{ + input logic req_valid_i, + output logic req_ready_o, + input hpdcache_uc_op_t req_op_i, + input hpdcache_req_addr_t req_addr_i, + input hpdcache_req_size_t req_size_i, + input hpdcache_req_data_t req_data_i, + input hpdcache_req_be_t req_be_i, + input logic req_uc_i, + input hpdcache_req_sid_t req_sid_i, + input hpdcache_req_tid_t req_tid_i, + input logic req_need_rsp_i, + // }}} + + // Write buffer interface + // {{{ + output logic wbuf_flush_all_o, + // }}} + + // AMO Cache Interface + // {{{ + output logic dir_amo_match_o, + output hpdcache_set_t dir_amo_match_set_o, + output hpdcache_tag_t dir_amo_match_tag_o, + output logic dir_amo_update_plru_o, + input hpdcache_way_vector_t dir_amo_hit_way_i, + + output logic data_amo_write_o, + output logic data_amo_write_enable_o, + output hpdcache_set_t data_amo_write_set_o, + output hpdcache_req_size_t data_amo_write_size_o, + output hpdcache_word_t data_amo_write_word_o, + output logic [63:0] data_amo_write_data_o, + output logic [7:0] data_amo_write_be_o, + // }}} + + // LR/SC reservation buffer + // {{{ + input logic lrsc_snoop_i, + input hpdcache_req_addr_t lrsc_snoop_addr_i, + input hpdcache_req_size_t lrsc_snoop_size_i, + // }}} + + // Core response interface + // {{{ + input logic core_rsp_ready_i, + output logic core_rsp_valid_o, + output hpdcache_rsp_t core_rsp_o, + // }}} + + // MEMORY interfaces + // {{{ + // Memory request unique identifier + input hpdcache_mem_id_t mem_read_id_i, + input hpdcache_mem_id_t mem_write_id_i, + + // Read interface + input logic mem_req_read_ready_i, + output logic mem_req_read_valid_o, + output hpdcache_mem_req_t mem_req_read_o, + + output logic mem_resp_read_ready_o, + input logic mem_resp_read_valid_i, + input hpdcache_mem_resp_r_t mem_resp_read_i, + + // Write interface + input logic mem_req_write_ready_i, + output logic mem_req_write_valid_o, + output hpdcache_mem_req_t mem_req_write_o, + + input logic mem_req_write_data_ready_i, + output logic mem_req_write_data_valid_o, + output hpdcache_mem_req_w_t mem_req_write_data_o, + + output logic mem_resp_write_ready_o, + input logic mem_resp_write_valid_i, + input hpdcache_mem_resp_w_t mem_resp_write_i, + // }}} + + // Configuration interface + // {{{ + input logic cfg_error_on_cacheable_amo_i + // }}} +); +// }}} + +// Definition of constants and types +// {{{ + localparam hpdcache_uint MEM_REQ_RATIO = HPDcacheMemDataWidth/HPDCACHE_REQ_DATA_WIDTH; + localparam hpdcache_uint MEM_REQ_WORD_INDEX_WIDTH = $clog2(MEM_REQ_RATIO); + + typedef enum { + UC_IDLE, + UC_WAIT_PENDING, + UC_MEM_REQ, + UC_MEM_W_REQ, + UC_MEM_WDATA_REQ, + UC_MEM_WAIT_RSP, + UC_CORE_RSP, + UC_AMO_READ_DIR, + UC_AMO_WRITE_DATA + } hpdcache_uc_fsm_t; + + localparam logic AMO_SC_SUCCESS = 1'b0; + localparam logic AMO_SC_FAILURE = 1'b1; + + function automatic logic [63:0] prepare_amo_data_operand( + input logic [63:0] data_i, + input hpdcache_req_size_t size_i, + input hpdcache_req_addr_t addr_i, + input logic sign_extend_i + ); + // 64-bits AMOs are already aligned, thus do nothing + if (size_i == hpdcache_req_size_t'(3)) begin + return data_i; + end + + // 32-bits AMOs + else begin + if (addr_i[2] == 1'b1) begin + if (sign_extend_i) begin + return {{32{data_i[63]}}, data_i[63:32]}; + end else begin + return {{32{ 1'b0}}, data_i[63:32]}; + end + end else begin + if (sign_extend_i) begin + return {{32{data_i[31]}}, data_i[31: 0]}; + end else begin + return {{32{ 1'b0}}, data_i[31: 0]}; + end + end + end + endfunction; + + function automatic logic [63:0] prepare_amo_data_result( + input logic [63:0] data_i, + input hpdcache_req_size_t size_i + ); + // 64-bits AMOs are already aligned, thus do nothing + if (size_i == hpdcache_req_size_t'(3)) begin + return data_i; + end + + // 32-bits AMOs + else begin + return {2{data_i[31:0]}}; + end + endfunction; + + function automatic logic amo_need_sign_extend(hpdcache_uc_op_t op); + unique case (1'b1) + op.is_amo_add, + op.is_amo_max, + op.is_amo_min: return 1'b1; + default : return 1'b0; + endcase; + endfunction +// }}} + +// Internal signals and registers +// {{{ + hpdcache_uc_fsm_t uc_fsm_q, uc_fsm_d; + hpdcache_uc_op_t req_op_q; + hpdcache_req_addr_t req_addr_q; + hpdcache_req_size_t req_size_q; + hpdcache_req_data_t req_data_q; + hpdcache_req_be_t req_be_q; + logic req_uc_q; + hpdcache_req_sid_t req_sid_q; + hpdcache_req_tid_t req_tid_q; + logic req_need_rsp_q; + + logic uc_sc_retcode_q, uc_sc_retcode_d; + + hpdcache_req_data_t rsp_rdata_q, rsp_rdata_d; + logic rsp_error_set, rsp_error_rst; + logic rsp_error_q; + logic mem_resp_write_valid_q, mem_resp_write_valid_d; + logic mem_resp_read_valid_q, mem_resp_read_valid_d; + + hpdcache_req_data_t mem_req_write_data; + logic [63:0] amo_req_ld_data; + logic [63:0] amo_ld_data; + logic [63:0] amo_req_st_data; + logic [63:0] amo_st_data; + logic [ 7:0] amo_st_be; + logic [63:0] amo_result; +// }}} + +// LR/SC reservation buffer logic +// {{{ + logic lrsc_rsrv_valid_q; + hpdcache_req_addr_t lrsc_rsrv_addr_q, lrsc_rsrv_addr_d; + hpdcache_nline_t lrsc_rsrv_nline; + hpdcache_offset_t lrsc_rsrv_word; + + hpdcache_offset_t lrsc_snoop_words; + hpdcache_nline_t lrsc_snoop_nline; + hpdcache_offset_t lrsc_snoop_base, lrsc_snoop_end; + logic lrsc_snoop_hit; + logic lrsc_snoop_reset; + + hpdcache_nline_t lrsc_uc_nline; + hpdcache_offset_t lrsc_uc_word; + logic lrsc_uc_hit; + logic lrsc_uc_set, lrsc_uc_reset; + + // NOTE: Reservation set for LR instruction is always 8-bytes in this + // implementation. + assign lrsc_rsrv_nline = hpdcache_get_req_addr_nline(lrsc_rsrv_addr_q), + lrsc_rsrv_word = hpdcache_get_req_addr_offset(lrsc_rsrv_addr_q) >> 3; + + // Check hit on LR/SC reservation for snoop port (normal write accesses) + assign lrsc_snoop_words = (lrsc_snoop_size_i < 3) ? 1 : hpdcache_offset_t'((8'h1 << lrsc_snoop_size_i) >> 3), + lrsc_snoop_nline = hpdcache_get_req_addr_nline(lrsc_snoop_addr_i), + lrsc_snoop_base = hpdcache_get_req_addr_offset(lrsc_snoop_addr_i) >> 3, + lrsc_snoop_end = lrsc_snoop_base + lrsc_snoop_words; + + assign lrsc_snoop_hit = lrsc_rsrv_valid_q & (lrsc_rsrv_nline == lrsc_snoop_nline) & + (lrsc_rsrv_word >= lrsc_snoop_base) & + (lrsc_rsrv_word < lrsc_snoop_end ); + + assign lrsc_snoop_reset = lrsc_snoop_i & lrsc_snoop_hit; + + // Check hit on LR/SC reservation for AMOs and SC + assign lrsc_uc_nline = hpdcache_get_req_addr_nline(req_addr_i), + lrsc_uc_word = hpdcache_get_req_addr_offset(req_addr_i) >> 3; + + assign lrsc_uc_hit = lrsc_rsrv_valid_q & (lrsc_rsrv_nline == lrsc_uc_nline) & + (lrsc_rsrv_word == lrsc_uc_word); +// }}} + +// Uncacheable request FSM +// {{{ + always_comb + begin : uc_fsm_comb + mem_resp_write_valid_d = mem_resp_write_valid_q; + mem_resp_read_valid_d = mem_resp_read_valid_q; + rsp_error_set = 1'b0; + rsp_error_rst = 1'b0; + lrsc_rsrv_addr_d = lrsc_rsrv_addr_q; + uc_sc_retcode_d = uc_sc_retcode_q; + wbuf_flush_all_o = 1'b0; + lrsc_uc_set = 1'b0; + lrsc_uc_reset = 1'b0; + + uc_fsm_d = uc_fsm_q; + + case (uc_fsm_q) + // Wait for a request + // {{{ + UC_IDLE: begin + + if (req_valid_i) begin + wbuf_flush_all_o = 1'b1; + + unique case (1'b1) + req_op_i.is_ld, + req_op_i.is_st: begin + if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin + uc_fsm_d = UC_MEM_REQ; + end else begin + uc_fsm_d = UC_WAIT_PENDING; + end + end + + req_op_i.is_amo_swap, + req_op_i.is_amo_add, + req_op_i.is_amo_and, + req_op_i.is_amo_or, + req_op_i.is_amo_xor, + req_op_i.is_amo_max, + req_op_i.is_amo_maxu, + req_op_i.is_amo_min, + req_op_i.is_amo_minu, + req_op_i.is_amo_lr: begin + // Reset LR/SC reservation if AMO matches its address + lrsc_uc_reset = ~req_op_i.is_amo_lr & lrsc_uc_hit; + + if (!req_uc_i && cfg_error_on_cacheable_amo_i) begin + rsp_error_set = 1'b1; + uc_fsm_d = UC_CORE_RSP; + end else begin + if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin + uc_fsm_d = UC_MEM_REQ; + end else begin + uc_fsm_d = UC_WAIT_PENDING; + end + end + end + + req_op_i.is_amo_sc: begin + if (!req_uc_i && cfg_error_on_cacheable_amo_i) begin + rsp_error_set = 1'b1; + uc_fsm_d = UC_CORE_RSP; + end else begin + // Reset previous reservation (if any) + lrsc_uc_reset = 1'b1; + + // SC with valid reservation + if (lrsc_uc_hit) begin + if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin + uc_fsm_d = UC_MEM_REQ; + end else begin + uc_fsm_d = UC_WAIT_PENDING; + end + end + // SC with no valid reservation, thus respond with the failure code + else begin + uc_sc_retcode_d = AMO_SC_FAILURE; + uc_fsm_d = UC_CORE_RSP; + end + end + end + + default: begin + if (req_need_rsp_i) begin + rsp_error_set = 1'b1; + uc_fsm_d = UC_CORE_RSP; + end + end + endcase + end + end + // }}} + + // Wait for the write buffer to be empty + // {{{ + UC_WAIT_PENDING: begin + if (wbuf_empty_i && mshr_empty_i && rtab_empty_i && ctrl_empty_i) begin + uc_fsm_d = UC_MEM_REQ; + end else begin + uc_fsm_d = UC_WAIT_PENDING; + end + end + // }}} + + // Send request to memory + // {{{ + UC_MEM_REQ: begin + uc_fsm_d = UC_MEM_REQ; + + mem_resp_write_valid_d = 1'b0; + mem_resp_read_valid_d = 1'b0; + + case (1'b1) + req_op_q.is_ld, + req_op_q.is_amo_lr: begin + if (mem_req_read_ready_i) begin + uc_fsm_d = UC_MEM_WAIT_RSP; + end + end + + req_op_q.is_st, + req_op_q.is_amo_sc, + req_op_q.is_amo_swap, + req_op_q.is_amo_add, + req_op_q.is_amo_and, + req_op_q.is_amo_or, + req_op_q.is_amo_xor, + req_op_q.is_amo_max, + req_op_q.is_amo_maxu, + req_op_q.is_amo_min, + req_op_q.is_amo_minu: begin + if (mem_req_write_ready_i && mem_req_write_data_ready_i) begin + uc_fsm_d = UC_MEM_WAIT_RSP; + end else if (mem_req_write_ready_i) begin + uc_fsm_d = UC_MEM_WDATA_REQ; + end else if (mem_req_write_data_ready_i) begin + uc_fsm_d = UC_MEM_W_REQ; + end + end + endcase + end + // }}} + + // Send write address + // {{{ + UC_MEM_W_REQ: begin + mem_resp_write_valid_d = mem_resp_write_valid_q | mem_resp_write_valid_i; + mem_resp_read_valid_d = mem_resp_read_valid_q | mem_resp_read_valid_i; + + if (mem_req_write_ready_i) begin + uc_fsm_d = UC_MEM_WAIT_RSP; + end else begin + uc_fsm_d = UC_MEM_W_REQ; + end + end + // }}} + + // Send write data + // {{{ + UC_MEM_WDATA_REQ: begin + mem_resp_write_valid_d = mem_resp_write_valid_q | mem_resp_write_valid_i; + mem_resp_read_valid_d = mem_resp_read_valid_q | mem_resp_read_valid_i; + + if (mem_req_write_data_ready_i) begin + uc_fsm_d = UC_MEM_WAIT_RSP; + end else begin + uc_fsm_d = UC_MEM_WDATA_REQ; + end + end + // }}} + + // Wait for the response from the memory + // {{{ + UC_MEM_WAIT_RSP: begin + automatic bit rd_error; + automatic bit wr_error; + + uc_fsm_d = UC_MEM_WAIT_RSP; + mem_resp_write_valid_d = mem_resp_write_valid_q | mem_resp_write_valid_i; + mem_resp_read_valid_d = mem_resp_read_valid_q | mem_resp_read_valid_i; + + rd_error = mem_resp_read_valid_i && + ( mem_resp_read_i.mem_resp_r_error == HPDCACHE_MEM_RESP_NOK); + wr_error = mem_resp_write_valid_i && + (mem_resp_write_i.mem_resp_w_error == HPDCACHE_MEM_RESP_NOK); + rsp_error_set = req_need_rsp_q & (rd_error | wr_error); + + case (1'b1) + req_op_q.is_ld: begin + if (mem_resp_read_valid_i) begin + if (req_need_rsp_q) begin + uc_fsm_d = UC_CORE_RSP; + end else begin + uc_fsm_d = UC_IDLE; + end + end + end + req_op_q.is_st: begin + if (mem_resp_write_valid_i) begin + if (req_need_rsp_q) begin + uc_fsm_d = UC_CORE_RSP; + end else begin + uc_fsm_d = UC_IDLE; + end + end + end + req_op_q.is_amo_lr: begin + if (mem_resp_read_valid_i) begin + // set a new reservation + if (!rd_error) + begin + lrsc_uc_set = 1'b1; + lrsc_rsrv_addr_d = req_addr_q; + end + // in case of a memory error, do not make the reservation and + // invalidate an existing one (if valid) + else begin + lrsc_uc_reset = 1'b1; + end + + if (req_uc_q || rd_error) begin + uc_fsm_d = UC_CORE_RSP; + end else begin + uc_fsm_d = UC_AMO_READ_DIR; + end + end + end + req_op_q.is_amo_sc: begin + if (mem_resp_write_valid_i) begin + automatic bit is_atomic; + + is_atomic = mem_resp_write_i.mem_resp_w_is_atomic && !wr_error; + uc_sc_retcode_d = is_atomic ? AMO_SC_SUCCESS : AMO_SC_FAILURE; + + if (req_uc_q || !is_atomic) begin + uc_fsm_d = UC_CORE_RSP; + end else begin + uc_fsm_d = UC_AMO_READ_DIR; + end + end + end + req_op_q.is_amo_swap, + req_op_q.is_amo_add, + req_op_q.is_amo_and, + req_op_q.is_amo_or, + req_op_q.is_amo_xor, + req_op_q.is_amo_max, + req_op_q.is_amo_maxu, + req_op_q.is_amo_min, + req_op_q.is_amo_minu: begin + // wait for both old data and write acknowledged were received + if ((mem_resp_read_valid_i && mem_resp_write_valid_i) || + (mem_resp_read_valid_i && mem_resp_write_valid_q) || + (mem_resp_read_valid_q && mem_resp_write_valid_i)) + begin + if (req_uc_q || rsp_error_q || rd_error || wr_error) begin + uc_fsm_d = UC_CORE_RSP; + end else begin + uc_fsm_d = UC_AMO_READ_DIR; + end + end + end + endcase + end + // }}} + + // Send the response to the requester + // {{{ + UC_CORE_RSP: begin + if (core_rsp_ready_i) begin + rsp_error_rst = 1'b1; + uc_fsm_d = UC_IDLE; + end else begin + uc_fsm_d = UC_CORE_RSP; + end + end + // }}} + + // Check for a cache hit on the AMO target address + // {{{ + UC_AMO_READ_DIR: begin + uc_fsm_d = UC_AMO_WRITE_DATA; + end + // }}} + + // Write the locally computed AMO result in the cache + // {{{ + UC_AMO_WRITE_DATA: begin + uc_fsm_d = UC_CORE_RSP; + end + // }}} + endcase + end +// }}} + +// AMO unit +// {{{ + localparam hpdcache_uint AMO_WORD_INDEX_WIDTH = $clog2(HPDCACHE_REQ_DATA_WIDTH/64); + + generate + if (AMO_WORD_INDEX_WIDTH > 0) begin : amo_operand_mux_gen + hpdcache_mux #( + .NINPUT (HPDCACHE_REQ_DATA_WIDTH/64), + .DATA_WIDTH (64), + .ONE_HOT_SEL (1'b0) + ) amo_ld_data_mux_i ( + .data_i (rsp_rdata_q), + .sel_i (req_addr_q[3 +: AMO_WORD_INDEX_WIDTH]), + .data_o (amo_req_ld_data) + ); + + hpdcache_mux #( + .NINPUT (HPDCACHE_REQ_DATA_WIDTH/64), + .DATA_WIDTH (64), + .ONE_HOT_SEL (1'b0) + ) amo_st_data_mux_i ( + .data_i (req_data_q), + .sel_i (req_addr_q[3 +: AMO_WORD_INDEX_WIDTH]), + .data_o (amo_req_st_data) + ); + + hpdcache_mux #( + .NINPUT (HPDCACHE_REQ_DATA_WIDTH/64), + .DATA_WIDTH (8), + .ONE_HOT_SEL (1'b0) + ) amo_st_be_mux_i ( + .data_i (req_be_q), + .sel_i (req_addr_q[3 +: AMO_WORD_INDEX_WIDTH]), + .data_o (amo_st_be) + ); + + end else begin + assign amo_req_ld_data = rsp_rdata_q; + assign amo_req_st_data = req_data_q; + assign amo_st_be = req_be_q; + end + endgenerate + + assign amo_ld_data = prepare_amo_data_operand(amo_req_ld_data, req_size_q, + req_addr_q, amo_need_sign_extend(req_op_q)); + assign amo_st_data = prepare_amo_data_operand(amo_req_st_data, req_size_q, + req_addr_q, amo_need_sign_extend(req_op_q)); + + hpdcache_amo amo_unit_i ( + .ld_data_i (amo_ld_data), + .st_data_i (amo_st_data), + .op_i (req_op_q), + .result_o (amo_result) + ); + + assign dir_amo_match_o = (uc_fsm_q == UC_AMO_READ_DIR), + dir_amo_match_set_o = hpdcache_get_req_addr_set(req_addr_q), + dir_amo_match_tag_o = hpdcache_get_req_addr_tag(req_addr_q), + dir_amo_update_plru_o = dir_amo_match_o; + + assign data_amo_write_o = (uc_fsm_q == UC_AMO_WRITE_DATA), + data_amo_write_enable_o = |dir_amo_hit_way_i, + data_amo_write_set_o = hpdcache_get_req_addr_set(req_addr_q), + data_amo_write_size_o = req_size_q, + data_amo_write_word_o = hpdcache_get_req_addr_word(req_addr_q), + data_amo_write_data_o = prepare_amo_data_result(amo_result, req_size_q), + data_amo_write_be_o = amo_st_be; +// }}} + +// Core response outputs +// {{{ + assign req_ready_o = (uc_fsm_q == UC_IDLE), + core_rsp_valid_o = (uc_fsm_q == UC_CORE_RSP); +// }}} + +// Memory read request outputs +// {{{ + always_comb + begin : mem_req_read_comb + mem_req_read_o.mem_req_addr = req_addr_q; + mem_req_read_o.mem_req_len = 0; + mem_req_read_o.mem_req_size = req_size_q; + mem_req_read_o.mem_req_id = mem_read_id_i; + mem_req_read_o.mem_req_cacheable = 1'b0; + mem_req_read_o.mem_req_command = HPDCACHE_MEM_READ; + mem_req_read_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD; + + unique case (1'b1) + req_op_q.is_ld: begin + mem_req_read_valid_o = (uc_fsm_q == UC_MEM_REQ); + end + req_op_q.is_amo_lr: begin + mem_req_read_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_read_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_LDEX; + mem_req_read_valid_o = (uc_fsm_q == UC_MEM_REQ); + end + default: begin + mem_req_read_valid_o = 1'b0; + end + endcase + end +// }}} + +// Memory write request outputs +// {{{ + always_comb + begin : mem_req_write_comb + mem_req_write_data = req_data_q; + mem_req_write_o.mem_req_addr = req_addr_q; + mem_req_write_o.mem_req_len = 0; + mem_req_write_o.mem_req_size = req_size_q; + mem_req_write_o.mem_req_id = mem_write_id_i; + mem_req_write_o.mem_req_cacheable = 1'b0; + unique case (1'b1) + req_op_q.is_amo_sc: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_STEX; + end + req_op_q.is_amo_swap: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SWAP; + end + req_op_q.is_amo_add: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD; + end + req_op_q.is_amo_and: begin + mem_req_write_data = ~req_data_q; + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_CLR; + end + req_op_q.is_amo_or: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SET; + end + req_op_q.is_amo_xor: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_EOR; + end + req_op_q.is_amo_max: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SMAX; + end + req_op_q.is_amo_maxu: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_UMAX; + end + req_op_q.is_amo_min: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_SMIN; + end + req_op_q.is_amo_minu: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_ATOMIC; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_UMIN; + end + default: begin + mem_req_write_o.mem_req_command = HPDCACHE_MEM_WRITE; + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD; + end + endcase + + unique case (uc_fsm_q) + UC_MEM_REQ: begin + unique case (1'b1) + req_op_q.is_st, + req_op_q.is_amo_sc, + req_op_q.is_amo_swap, + req_op_q.is_amo_add, + req_op_q.is_amo_and, + req_op_q.is_amo_or, + req_op_q.is_amo_xor, + req_op_q.is_amo_max, + req_op_q.is_amo_maxu, + req_op_q.is_amo_min, + req_op_q.is_amo_minu: begin + mem_req_write_data_valid_o = 1'b1; + mem_req_write_valid_o = 1'b1; + end + + default: begin + mem_req_write_data_valid_o = 1'b0; + mem_req_write_valid_o = 1'b0; + end + endcase + end + + UC_MEM_W_REQ: begin + mem_req_write_valid_o = 1'b1; + mem_req_write_data_valid_o = 1'b0; + end + + UC_MEM_WDATA_REQ: begin + mem_req_write_valid_o = 1'b0; + mem_req_write_data_valid_o = 1'b1; + end + + default: begin + mem_req_write_valid_o = 1'b0; + mem_req_write_data_valid_o = 1'b0; + end + endcase + end + + generate + // memory data width is bigger than the width of the core's interface + if (MEM_REQ_RATIO > 1) begin : mem_req_data_gen + // replicate data + assign mem_req_write_data_o.mem_req_w_data = {MEM_REQ_RATIO{mem_req_write_data}}; + + // demultiplex the byte-enable + hpdcache_demux #( + .NOUTPUT (MEM_REQ_RATIO), + .DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH/8) + ) mem_write_be_demux_i ( + .data_i (req_be_q), + .sel_i (req_addr_q[HPDCACHE_REQ_BYTE_OFFSET_WIDTH +: MEM_REQ_WORD_INDEX_WIDTH]), + .data_o (mem_req_write_data_o.mem_req_w_be) + ); + end + + // memory data width is equal to the width of the core's interface + else begin + assign mem_req_write_data_o.mem_req_w_data = mem_req_write_data; + assign mem_req_write_data_o.mem_req_w_be = req_be_q; + end + + assign mem_req_write_data_o.mem_req_w_last = 1'b1; + endgenerate +// }}} + +// Response handling +// {{{ + logic [63:0] sc_retcode; + logic [63:0] sc_rdata; + + assign sc_retcode = {{63{1'b0}}, uc_sc_retcode_q}, + sc_rdata = prepare_amo_data_result(sc_retcode, req_size_q); + + assign core_rsp_o.rdata = req_op_q.is_amo_sc ? {HPDCACHE_REQ_WORDS{sc_rdata}} : rsp_rdata_q, + core_rsp_o.sid = req_sid_q, + core_rsp_o.tid = req_tid_q, + core_rsp_o.error = rsp_error_q, + core_rsp_o.aborted = 1'b0; + + // Resize the memory response data to the core response width + generate + // memory data width is bigger than the width of the core's interface + if (MEM_REQ_RATIO > 1) begin : core_rsp_data_gen + hpdcache_mux #( + .NINPUT (MEM_REQ_RATIO), + .DATA_WIDTH (HPDCACHE_REQ_DATA_WIDTH) + ) data_read_rsp_mux_i( + .data_i (mem_resp_read_i.mem_resp_r_data), + .sel_i (req_addr_q[HPDCACHE_REQ_BYTE_OFFSET_WIDTH +: MEM_REQ_WORD_INDEX_WIDTH]), + .data_o (rsp_rdata_d) + ); + end + + // memory data width is equal to the width of the core's interface + else begin + assign rsp_rdata_d = mem_resp_read_i.mem_resp_r_data; + end + endgenerate + + // This FSM is always ready to accept the response + assign mem_resp_read_ready_o = 1'b1, + mem_resp_write_ready_o = 1'b1; +// }}} + +// Set cache request registers +// {{{ + always_ff @(posedge clk_i) + begin : req_ff + if (req_valid_i && req_ready_o) begin + req_op_q <= req_op_i; + req_addr_q <= req_addr_i; + req_size_q <= req_size_i; + req_data_q <= req_data_i; + req_be_q <= req_be_i; + req_uc_q <= req_uc_i; + req_sid_q <= req_sid_i; + req_tid_q <= req_tid_i; + req_need_rsp_q <= req_need_rsp_i; + end + end +// }}} + +// Uncacheable request FSM set state +// {{{ + logic lrsc_rsrv_valid_set, lrsc_rsrv_valid_reset; + + assign lrsc_rsrv_valid_set = lrsc_uc_set, + lrsc_rsrv_valid_reset = lrsc_uc_reset | lrsc_snoop_reset; + + always_ff @(posedge clk_i or negedge rst_ni) + begin : uc_fsm_ff + if (!rst_ni) begin + uc_fsm_q <= UC_IDLE; + lrsc_rsrv_valid_q <= 1'b0; + end else begin + uc_fsm_q <= uc_fsm_d; + lrsc_rsrv_valid_q <= (~lrsc_rsrv_valid_q & lrsc_rsrv_valid_set ) | + ( lrsc_rsrv_valid_q & ~lrsc_rsrv_valid_reset); + end + end + + always_ff @(posedge clk_i) + begin : uc_amo_ff + lrsc_rsrv_addr_q <= lrsc_rsrv_addr_d; + uc_sc_retcode_q <= uc_sc_retcode_d; + end +// }}} + +// Response registers +// {{{ + always_ff @(posedge clk_i) + begin + if (mem_resp_read_valid_i) begin + rsp_rdata_q <= rsp_rdata_d; + end + mem_resp_write_valid_q <= mem_resp_write_valid_d; + mem_resp_read_valid_q <= mem_resp_read_valid_d; + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + rsp_error_q <= 1'b0; + end else begin + rsp_error_q <= (~rsp_error_q & rsp_error_set) | + ( rsp_error_q & ~rsp_error_rst); + end + end +// }}} + +// Assertions +// {{{ +// pragma translate_off + assert property (@(posedge clk_i) + (req_valid_i && req_op_i.is_ld) -> req_uc_i) else + $error("uc_handler: unexpected load request on cacheable region"); + + assert property (@(posedge clk_i) + (req_valid_i && req_op_i.is_st) -> req_uc_i) else + $error("uc_handler: unexpected store request on cacheable region"); + + assert property (@(posedge clk_i) + (req_valid_i && (req_op_i.is_amo_lr || + req_op_i.is_amo_sc || + req_op_i.is_amo_swap || + req_op_i.is_amo_add || + req_op_i.is_amo_and || + req_op_i.is_amo_or || + req_op_i.is_amo_xor || + req_op_i.is_amo_max || + req_op_i.is_amo_maxu || + req_op_i.is_amo_min || + req_op_i.is_amo_minu )) -> req_need_rsp_i) else + $error("uc_handler: amo requests shall need a response"); + + assert property (@(posedge clk_i) + (req_valid_i && (req_op_i.is_amo_lr || + req_op_i.is_amo_sc || + req_op_i.is_amo_swap || + req_op_i.is_amo_add || + req_op_i.is_amo_and || + req_op_i.is_amo_or || + req_op_i.is_amo_xor || + req_op_i.is_amo_max || + req_op_i.is_amo_maxu || + req_op_i.is_amo_min || + req_op_i.is_amo_minu )) -> (req_size_i inside {2,3})) else + $error("uc_handler: amo requests shall be 4 or 8 bytes wide"); + + assert property (@(posedge clk_i) + (mem_resp_write_valid_i || mem_resp_read_valid_i) -> (uc_fsm_q == UC_MEM_WAIT_RSP)) else + $error("uc_handler: unexpected response from memory"); +// pragma translate_on +// }}} + +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_wbuf.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_wbuf.sv new file mode 100644 index 00000000000..e6cf560dc36 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_wbuf.sv @@ -0,0 +1,685 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Write Buffer + * History : + */ +/* + * Improvements + * ================= + * TODO Use a feedthrough FIFO for the data pointers in the send data interface. + * Currently, there is always an one-cycle latency between the write + * and the availability of the data. + */ +module hpdcache_wbuf + // Parameters + // {{{ +#( + // Number of entries in the directory part of the Write Buffer + parameter int unsigned WBUF_DIR_ENTRIES = 0, + // Number of entries in the data part of the Write Buffer + parameter int unsigned WBUF_DATA_ENTRIES = 0, + // Width in bits of the write words + parameter int unsigned WBUF_WORD_WIDTH = 0, + // Number of words per line in the write buffer + parameter int unsigned WBUF_WORDS = 0, + // Width in bits of the physical address + parameter int unsigned WBUF_PA_WIDTH = 0, + // Maximum value of the time counter + parameter int unsigned WBUF_TIMECNT_MAX = 8, + // Number of most significant bits to check for read conflicts + parameter int unsigned WBUF_READ_MATCH_WIDTH = 0, + + localparam int unsigned WBUF_OFFSET_WIDTH = $clog2((WBUF_WORD_WIDTH*WBUF_WORDS)/8), + localparam int unsigned WBUF_TAG_WIDTH = WBUF_PA_WIDTH - WBUF_OFFSET_WIDTH, + localparam int unsigned WBUF_WORD_OFFSET = $clog2(WBUF_WORD_WIDTH/8), + localparam int unsigned WBUF_DATA_PTR_WIDTH = $clog2(WBUF_DATA_ENTRIES), + localparam int unsigned WBUF_DIR_PTR_WIDTH = $clog2(WBUF_DIR_ENTRIES), + localparam int unsigned WBUF_TIMECNT_WIDTH = $clog2(WBUF_TIMECNT_MAX), + localparam type wbuf_addr_t = logic unsigned [ WBUF_PA_WIDTH-1:0], + localparam type wbuf_dir_ptr_t = logic unsigned [ WBUF_DIR_PTR_WIDTH-1:0], + localparam type wbuf_data_ptr_t = logic unsigned [ WBUF_DATA_PTR_WIDTH-1:0], + localparam type wbuf_data_t = logic [ WBUF_WORD_WIDTH-1:0], + localparam type wbuf_be_t = logic [ WBUF_WORD_WIDTH/8-1:0], + localparam type wbuf_data_buf_t = wbuf_data_t [ WBUF_WORDS-1:0], + localparam type wbuf_be_buf_t = wbuf_be_t [ WBUF_WORDS-1:0], + localparam type wbuf_tag_t = logic unsigned [ WBUF_TAG_WIDTH-1:0], + localparam type wbuf_match_t = logic unsigned [WBUF_READ_MATCH_WIDTH-1:0], + localparam type wbuf_timecnt_t = logic unsigned [ WBUF_TIMECNT_WIDTH-1:0] +) + // }}} + // Ports + // {{{ +( + // Clock and reset signals + input logic clk_i, + input logic rst_ni, + + // Global control signals + output logic empty_o, + output logic full_o, + input logic flush_all_i, + + // Configuration signals + // Timer threshold + input wbuf_timecnt_t cfg_threshold_i, + // Reset timer on write + input logic cfg_reset_timecnt_on_write_i, + // Sequentialize write-after-write hazards + input logic cfg_sequential_waw_i, + // Inhibit write coalescing + input logic cfg_inhibit_write_coalescing_i, + + // Write interface + input logic write_i, + output logic write_ready_o, + input wbuf_addr_t write_addr_i, + input wbuf_data_t write_data_i, + input wbuf_be_t write_be_i, // byte-enable + input logic write_uc_i, // uncacheable write + + // Read hit interface + input wbuf_addr_t read_addr_i, + output logic read_hit_o, + input logic read_flush_hit_i, + + // Replay hit interface + input wbuf_addr_t replay_addr_i, + input logic replay_is_read_i, + output logic replay_open_hit_o, + output logic replay_pend_hit_o, + output logic replay_sent_hit_o, + output logic replay_not_ready_o, + + // Send interface + input logic send_meta_ready_i, + output logic send_meta_valid_o, + output wbuf_addr_t send_addr_o, + output wbuf_dir_ptr_t send_id_o, + output logic send_uc_o, + + input logic send_data_ready_i, + output logic send_data_valid_o, + output wbuf_addr_t send_data_tag_o, + output wbuf_data_buf_t send_data_o, + output wbuf_be_buf_t send_be_o, + + // Acknowledge interface + input logic ack_i, + input wbuf_dir_ptr_t ack_id_i, + input logic ack_error_i +); + // }}} + + // Definition of constants, types and functions + // {{{ + localparam int WBUF_SEND_FIFO_DEPTH = WBUF_DATA_ENTRIES; + + typedef logic unsigned [31:0] wbuf_uint; + + typedef enum logic [1:0] { + WBUF_FREE = 2'b00, // unused/free slot + WBUF_OPEN = 2'b01, // there are pending writes in this slot + WBUF_PEND = 2'b10, // the slot is waiting to be sent + WBUF_SENT = 2'b11 // the slot is sent and waits for the memory acknowledge + } wbuf_state_e; + + typedef struct packed { + wbuf_data_ptr_t ptr; + wbuf_timecnt_t cnt; + wbuf_tag_t tag; + logic uc; + } wbuf_dir_entry_t; + + typedef struct packed { + wbuf_data_buf_t data; + wbuf_be_buf_t be; + } wbuf_data_entry_t; + + typedef struct packed { + wbuf_data_ptr_t send_data_ptr; + wbuf_tag_t send_data_tag; + } wbuf_send_data_t; + + typedef struct packed { + wbuf_tag_t send_meta_tag; + wbuf_dir_ptr_t send_meta_id; + logic send_meta_uc; + } wbuf_send_meta_t; + + function automatic wbuf_dir_ptr_t wbuf_dir_find_next( + input wbuf_dir_ptr_t curr_ptr, + input wbuf_state_e [WBUF_DIR_ENTRIES-1:0] dir_state, + input wbuf_state_e state); + automatic wbuf_dir_ptr_t next_ptr; + for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin + next_ptr = wbuf_dir_ptr_t'((i + int'(curr_ptr) + 1) % WBUF_DIR_ENTRIES); + if (dir_state[next_ptr] == state) begin + return next_ptr; + end + end + return curr_ptr; + endfunction + + function automatic wbuf_data_ptr_t wbuf_data_find_next( + input wbuf_data_ptr_t curr_ptr, + input logic [WBUF_DATA_ENTRIES-1:0] data_valid, + input logic state); + automatic wbuf_data_ptr_t next_ptr; + for (int unsigned i = 0; i < WBUF_DATA_ENTRIES; i++) begin + next_ptr = wbuf_data_ptr_t'((i + int'(curr_ptr) + 1) % WBUF_DATA_ENTRIES); + if (data_valid[next_ptr] == state) begin + return next_ptr; + end + end + return curr_ptr; + endfunction + + function automatic void wbuf_data_write( + output wbuf_data_buf_t wbuf_ret_data, + output wbuf_be_buf_t wbuf_ret_be, + input wbuf_data_buf_t wbuf_old_data, + input wbuf_be_buf_t wbuf_old_be, + input wbuf_data_buf_t wbuf_new_data, + input wbuf_be_buf_t wbuf_new_be); + for (int unsigned w = 0; w < WBUF_WORDS; w++) begin + for (int unsigned b = 0; b < WBUF_WORD_WIDTH/8; b++) begin + wbuf_ret_data[w][b*8 +: 8] = wbuf_new_be[w][b] ? + wbuf_new_data[w][b*8 +: 8] : + wbuf_old_data[w][b*8 +: 8]; + end + wbuf_ret_be[w] = wbuf_old_be[w] | wbuf_new_be[w]; + end + endfunction + + function automatic wbuf_match_t wbuf_tag_to_match_addr(wbuf_tag_t tag); + return tag[WBUF_TAG_WIDTH - 1:WBUF_TAG_WIDTH - WBUF_READ_MATCH_WIDTH]; + endfunction + // }}} + + // Definition of internal wires and registers + // {{{ + wbuf_state_e [ WBUF_DIR_ENTRIES-1:0] wbuf_dir_state_q, wbuf_dir_state_d; + wbuf_dir_entry_t [ WBUF_DIR_ENTRIES-1:0] wbuf_dir_q, wbuf_dir_d; + logic [WBUF_DATA_ENTRIES-1:0] wbuf_data_valid_q, wbuf_data_valid_d; + wbuf_data_entry_t [WBUF_DATA_ENTRIES-1:0] wbuf_data_q, wbuf_data_d; + + wbuf_dir_ptr_t wbuf_dir_free_ptr_q, wbuf_dir_free_ptr_d; + logic wbuf_dir_free; + wbuf_dir_ptr_t wbuf_dir_send_ptr_q, wbuf_dir_send_ptr_d; + wbuf_data_ptr_t wbuf_data_free_ptr_q, wbuf_data_free_ptr_d; + logic wbuf_data_free; + + logic wbuf_write_free; + logic wbuf_write_hit_open; + logic wbuf_write_hit_pend; + logic wbuf_write_hit_sent; + wbuf_dir_ptr_t wbuf_write_hit_open_dir_ptr; + wbuf_dir_ptr_t wbuf_write_hit_pend_dir_ptr; + + logic send_meta_valid; + logic send_meta_ready; + wbuf_send_meta_t send_meta_wdata, send_meta_rdata; + + logic fifo_send_data_wok; + logic fifo_send_data_w; + wbuf_send_data_t fifo_send_data_d; + logic fifo_send_data_r; + logic fifo_send_data_rok; + wbuf_send_data_t fifo_send_data_q; + + wbuf_tag_t write_tag; + wbuf_data_buf_t write_data; + wbuf_be_buf_t write_be; + + logic [WBUF_DIR_ENTRIES-1:0] replay_match; + logic [WBUF_DIR_ENTRIES-1:0] replay_open_hit; + logic [WBUF_DIR_ENTRIES-1:0] replay_pend_hit; + logic [WBUF_DIR_ENTRIES-1:0] replay_sent_hit; + + genvar gen_i; + // }}} + + // Global control signals + // {{{ + always_comb + begin : empty_comb + empty_o = 1'b1; + for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin + empty_o &= (wbuf_dir_state_q[i] == WBUF_FREE); + end + end + + always_comb + begin : full_comb + full_o = 1'b1; + for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin + full_o &= (wbuf_dir_state_q[i] != WBUF_FREE); + end + end + // }}} + + // Write control + // {{{ + assign write_tag = write_addr_i[WBUF_PA_WIDTH-1:WBUF_OFFSET_WIDTH]; + + always_comb + begin : wbuf_write_data_comb + for (int unsigned w = 0; w < WBUF_WORDS; w++) begin + write_data[w] = write_data_i; + end + end + + generate + if (WBUF_OFFSET_WIDTH > WBUF_WORD_OFFSET) begin : wbuf_write_be_gt_gen + always_comb + begin : wbuf_write_be_comb + for (int unsigned w = 0; w < WBUF_WORDS; w++) begin + if (w == int'(write_addr_i[WBUF_OFFSET_WIDTH-1:WBUF_WORD_OFFSET])) begin + write_be[w] = write_be_i; + end else begin + write_be[w] = '0; + end + end + end + end else begin : wbuf_write_be_le_gen + always_comb + begin : wbuf_write_be_comb + for (int unsigned w = 0; w < WBUF_WORDS; w++) begin + write_be[w] = write_be_i; + end + end + end + endgenerate + + always_comb + begin : wbuf_free_comb + wbuf_dir_free_ptr_d = wbuf_dir_free_ptr_q; + if (ack_i) begin + wbuf_dir_free_ptr_d = ack_id_i; + end else if (write_i && wbuf_write_free) begin + wbuf_dir_free_ptr_d = wbuf_dir_find_next(wbuf_dir_free_ptr_q, wbuf_dir_state_q, WBUF_FREE); + end + + wbuf_data_free_ptr_d = wbuf_data_free_ptr_q; + if (send_data_valid_o && send_data_ready_i) begin + wbuf_data_free_ptr_d = fifo_send_data_q.send_data_ptr; + end else if (write_i && wbuf_write_free) begin + wbuf_data_free_ptr_d = wbuf_data_find_next(wbuf_data_free_ptr_q, wbuf_data_valid_q, 1'b0); + end + end + + assign wbuf_dir_free = (wbuf_dir_state_q[wbuf_dir_free_ptr_q] == WBUF_FREE); + assign wbuf_data_free = ~wbuf_data_valid_q[wbuf_data_free_ptr_q]; + + always_comb + begin : wbuf_write_hit_comb + wbuf_write_hit_open = 1'b0; + wbuf_write_hit_pend = 1'b0; + wbuf_write_hit_sent = 1'b0; + + wbuf_write_hit_open_dir_ptr = 0; + wbuf_write_hit_pend_dir_ptr = 0; + for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin + if (wbuf_dir_q[i].tag == write_tag) begin + unique case (wbuf_dir_state_q[i]) + WBUF_OPEN: begin + wbuf_write_hit_open = 1'b1; + wbuf_write_hit_open_dir_ptr = wbuf_dir_ptr_t'(i); + end + WBUF_PEND: begin + wbuf_write_hit_pend = 1'b1; + wbuf_write_hit_pend_dir_ptr = wbuf_dir_ptr_t'(i); + end + WBUF_SENT: begin + wbuf_write_hit_sent = 1'b1; + end + default: begin + /* do nothing */ + end + endcase + end + end + end + + // Check if there is a match between the read address and the tag of one + // of the used slots in the write buffer directory + always_comb + begin : read_hit_comb + automatic logic [WBUF_DIR_ENTRIES-1:0] read_hit; + + for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin + read_hit[i] = 1'b0; + unique case (wbuf_dir_state_q[i]) + WBUF_OPEN, WBUF_PEND, WBUF_SENT: begin + automatic wbuf_addr_t wbuf_addr; + automatic wbuf_match_t wbuf_tag; + automatic wbuf_match_t read_tag; + + wbuf_addr = wbuf_addr_t'(wbuf_dir_q[i].tag) << WBUF_OFFSET_WIDTH; + read_tag = read_addr_i[WBUF_PA_WIDTH-1:WBUF_PA_WIDTH - WBUF_READ_MATCH_WIDTH]; + wbuf_tag = wbuf_addr [WBUF_PA_WIDTH-1:WBUF_PA_WIDTH - WBUF_READ_MATCH_WIDTH]; + read_hit[i] = (read_tag == wbuf_tag) ? 1'b1 : 1'b0; + end + default: begin + /* do nothing */ + end + endcase + end + + read_hit_o = |read_hit; + end + + // Check if there is a match between the replay address and the tag of one + // of the used slots in the write buffer directory + generate + for (gen_i = 0; gen_i < WBUF_DIR_ENTRIES; gen_i++) begin : replay_match_gen + assign replay_match[gen_i] = replay_is_read_i ? + /* replay is read: compare address block tag (e.g. cache line) */ + (wbuf_tag_to_match_addr(wbuf_dir_q[gen_i].tag) == + replay_addr_i[WBUF_PA_WIDTH - 1:WBUF_PA_WIDTH - WBUF_READ_MATCH_WIDTH]) : + /* replay is write: compare wbuf tag */ + (wbuf_dir_q[gen_i].tag == + replay_addr_i[WBUF_PA_WIDTH - 1:WBUF_PA_WIDTH - WBUF_TAG_WIDTH]); + + assign replay_open_hit[gen_i] = + replay_match[gen_i] && (wbuf_dir_state_q[gen_i] == WBUF_OPEN); + assign replay_pend_hit[gen_i] = + replay_match[gen_i] && (wbuf_dir_state_q[gen_i] == WBUF_PEND); + assign replay_sent_hit[gen_i] = + replay_match[gen_i] && (wbuf_dir_state_q[gen_i] == WBUF_SENT); + end + endgenerate + + assign replay_open_hit_o = |replay_open_hit, + replay_pend_hit_o = |replay_pend_hit, + replay_sent_hit_o = |replay_sent_hit; + + always_comb + begin : replay_wbuf_not_ready_comb + replay_not_ready_o = 1'b0; + if (replay_pend_hit_o) begin + replay_not_ready_o = 1'b1; + end else if (replay_sent_hit_o && cfg_sequential_waw_i) begin + replay_not_ready_o = 1'b1; + end else if (!replay_open_hit_o && (!wbuf_dir_free || !wbuf_data_free)) begin + replay_not_ready_o = 1'b1; + end + end + + assign wbuf_write_free = + wbuf_dir_free + & wbuf_data_free + & ~wbuf_write_hit_open + & ~wbuf_write_hit_pend + & ~(wbuf_write_hit_sent & cfg_sequential_waw_i); + + assign write_ready_o = wbuf_write_free + | ((wbuf_write_hit_open | wbuf_write_hit_pend) + & ~cfg_inhibit_write_coalescing_i); + // }}} + + // Update control + // {{{ + always_comb + begin : wbuf_update_comb + automatic bit timeout; + automatic bit write_hit; + automatic bit read_hit; + automatic bit match_open_ptr; + automatic bit match_pend_ptr; + automatic bit match_free; + automatic bit send; + + timeout = 1'b0; + write_hit = 1'b0; + read_hit = 1'b0; + match_open_ptr = 1'b0; + match_pend_ptr = 1'b0; + match_free = 1'b0; + send = 1'b0; + + wbuf_dir_state_d = wbuf_dir_state_q; + wbuf_dir_d = wbuf_dir_q; + wbuf_data_d = wbuf_data_q; + + fifo_send_data_w = 1'b0; + send_meta_valid = 1'b0; + + for (int unsigned i = 0; i < WBUF_DIR_ENTRIES; i++) begin + case (wbuf_dir_state_q[i]) + WBUF_FREE: begin + match_free = wbuf_write_free && (i == int'(wbuf_dir_free_ptr_q)); + + if (write_i && match_free) begin + send = (cfg_threshold_i == 0) + | write_uc_i + | flush_all_i + | cfg_inhibit_write_coalescing_i; + + wbuf_dir_state_d[i] = send ? WBUF_PEND : WBUF_OPEN; + wbuf_dir_d[i].tag = write_tag; + wbuf_dir_d[i].cnt = 0; + wbuf_dir_d[i].ptr = wbuf_data_free_ptr_q; + wbuf_dir_d[i].uc = write_uc_i; + + wbuf_data_write( + wbuf_data_d[wbuf_data_free_ptr_q].data, + wbuf_data_d[wbuf_data_free_ptr_q].be, + '0, + '0, + write_data, + write_be + ); + end + end + + WBUF_OPEN: begin + match_open_ptr = (i == int'(wbuf_write_hit_open_dir_ptr)); + timeout = (wbuf_dir_q[i].cnt == (cfg_threshold_i - 1)); + read_hit = read_flush_hit_i & wbuf_write_hit_open & match_open_ptr; + write_hit = write_i + & wbuf_write_hit_open + & match_open_ptr + & ~cfg_inhibit_write_coalescing_i; + + if (!flush_all_i) begin + if (write_hit && cfg_reset_timecnt_on_write_i) begin + timeout = 1'b0; + wbuf_dir_d[i].cnt = 0; + end else if (!timeout) begin + wbuf_dir_d[i].cnt = wbuf_dir_q[i].cnt + 1; + end + + if (read_hit | timeout | cfg_inhibit_write_coalescing_i) begin + wbuf_dir_state_d[i] = WBUF_PEND; + end + end else begin + wbuf_dir_state_d[i] = WBUF_PEND; + end + + if (write_hit) begin + wbuf_data_write( + wbuf_data_d[wbuf_dir_q[i].ptr].data, + wbuf_data_d[wbuf_dir_q[i].ptr].be, + wbuf_data_q[wbuf_dir_q[i].ptr].data, + wbuf_data_q[wbuf_dir_q[i].ptr].be, + write_data, + write_be + ); + end + end + + WBUF_PEND: begin + match_pend_ptr = (i == int'(wbuf_write_hit_pend_dir_ptr)); + write_hit = write_i + & wbuf_write_hit_pend + & match_pend_ptr + & ~cfg_inhibit_write_coalescing_i; + + if (write_hit) begin + wbuf_data_write( + wbuf_data_d[wbuf_dir_q[i].ptr].data, + wbuf_data_d[wbuf_dir_q[i].ptr].be, + wbuf_data_q[wbuf_dir_q[i].ptr].data, + wbuf_data_q[wbuf_dir_q[i].ptr].be, + write_data, + write_be + ); + end + + if (i == int'(wbuf_dir_send_ptr_q)) begin + fifo_send_data_w = send_meta_ready; + send_meta_valid = fifo_send_data_wok; + if (send_meta_ready && fifo_send_data_wok) begin + wbuf_dir_state_d[i] = WBUF_SENT; + end + end + end + + WBUF_SENT: begin + if (ack_i && (i == int'(ack_id_i))) begin + wbuf_dir_state_d[i] = WBUF_FREE; + end + end + endcase + end + end + + always_comb + begin : wbuf_data_valid_comb + wbuf_data_valid_d = wbuf_data_valid_q; + + // allocate a free data buffer on new write + if (write_i && wbuf_write_free) begin + wbuf_data_valid_d[wbuf_data_free_ptr_q] = 1'b1; + end + + // de-allocate a data buffer as soon as it is send + if (send_data_valid_o && send_data_ready_i) begin + wbuf_data_valid_d[fifo_send_data_q.send_data_ptr] = 1'b0; + end + end + // }}} + + // Send control + // {{{ + // Data channel + assign fifo_send_data_d.send_data_ptr = wbuf_dir_q[wbuf_dir_send_ptr_q].ptr, + fifo_send_data_d.send_data_tag = wbuf_dir_q[wbuf_dir_send_ptr_q].tag; + + hpdcache_fifo_reg #( + .FIFO_DEPTH (WBUF_SEND_FIFO_DEPTH), + .fifo_data_t (wbuf_send_data_t) + ) send_data_ptr_fifo_i ( + .clk_i, + .rst_ni, + .w_i (fifo_send_data_w), + .wok_o (fifo_send_data_wok), + .wdata_i (fifo_send_data_d), + .r_i (fifo_send_data_r), + .rok_o (fifo_send_data_rok), + .rdata_o (fifo_send_data_q) + ); + + assign fifo_send_data_r = send_data_ready_i, + send_data_valid_o = fifo_send_data_rok, + send_data_tag_o = wbuf_addr_t'(fifo_send_data_q.send_data_tag), + send_data_o = wbuf_data_q[fifo_send_data_q.send_data_ptr].data, + send_be_o = wbuf_data_q[fifo_send_data_q.send_data_ptr].be; + + // Meta-data channel + hpdcache_fifo_reg #( + .FIFO_DEPTH (WBUF_SEND_FIFO_DEPTH), + .fifo_data_t (wbuf_send_meta_t) + ) send_meta_fifo_i ( + .clk_i, + .rst_ni, + .w_i (send_meta_valid), + .wok_o (send_meta_ready), + .wdata_i (send_meta_wdata), + .r_i (send_meta_ready_i), + .rok_o (send_meta_valid_o), + .rdata_o (send_meta_rdata) + ); + + assign send_meta_wdata.send_meta_tag = wbuf_dir_q[wbuf_dir_send_ptr_q].tag, + send_meta_wdata.send_meta_id = wbuf_dir_send_ptr_q, + send_meta_wdata.send_meta_uc = wbuf_dir_q[wbuf_dir_send_ptr_q].uc; + + assign send_addr_o = { send_meta_rdata.send_meta_tag, {WBUF_OFFSET_WIDTH{1'b0}} }, + send_id_o = send_meta_rdata.send_meta_id, + send_uc_o = send_meta_rdata.send_meta_uc; + + // Send pointer + always_comb + begin : wbuf_send_comb + wbuf_dir_send_ptr_d = wbuf_dir_find_next(wbuf_dir_send_ptr_q, wbuf_dir_state_q, WBUF_PEND); + if (wbuf_dir_state_q[wbuf_dir_send_ptr_q] == WBUF_PEND) begin + if (!send_meta_valid || !send_meta_ready) begin + wbuf_dir_send_ptr_d = wbuf_dir_send_ptr_q; + end + end + end + // }}} + + // Internal state assignment + // {{{ + always_ff @(posedge clk_i) wbuf_data_q <= wbuf_data_d; + + always_ff @(posedge clk_i or negedge rst_ni) + begin : wbuf_state_ff + if (!rst_ni) begin + wbuf_dir_q <= '0; + wbuf_dir_state_q <= {WBUF_DIR_ENTRIES{WBUF_FREE}}; + wbuf_data_valid_q <= '0; + wbuf_dir_free_ptr_q <= 0; + wbuf_dir_send_ptr_q <= 0; + wbuf_data_free_ptr_q <= 0; + end else begin + wbuf_dir_q <= wbuf_dir_d; + wbuf_dir_state_q <= wbuf_dir_state_d; + wbuf_data_valid_q <= wbuf_data_valid_d; + wbuf_dir_free_ptr_q <= wbuf_dir_free_ptr_d; + wbuf_dir_send_ptr_q <= wbuf_dir_send_ptr_d; + wbuf_data_free_ptr_q <= wbuf_data_free_ptr_d; + end + end + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial assert(WBUF_WORDS inside {1, 2, 4, 8, 16}) else + $error("WBUF: width of data buffers must be a power of 2"); + ack_sent_assert: assert property (@(posedge clk_i) + (ack_i -> (wbuf_dir_state_q[ack_id_i] == WBUF_SENT))) else + $error("WBUF: acknowledging a not SENT slot"); + send_pend_assert: assert property (@(posedge clk_i) + (send_meta_valid -> (wbuf_dir_state_q[wbuf_dir_send_ptr_q] == WBUF_PEND))) else + $error("WBUF: sending a not PEND slot"); + send_valid_data_assert: assert property (@(posedge clk_i) + (send_data_valid_o -> (wbuf_data_valid_q[fifo_send_data_q.send_data_ptr] == 1'b1))) else + $error("WBUF: sending a not valid data"); + // pragma translate_on + // }}} +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_wbuf_wrapper.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_wbuf_wrapper.sv new file mode 100644 index 00000000000..abb4e211d3c --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hpdcache_wbuf_wrapper.sv @@ -0,0 +1,227 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : HPDcache Write Buffer Wrapper + * History : + */ +/* This wrapper adapts the send interface of the write buffer to the memory + * interface of the cache. + */ +module hpdcache_wbuf_wrapper +import hpdcache_pkg::*; + // Parameters + // {{{ +#( + parameter int HPDcacheMemIdWidth = 8, + parameter int HPDcacheMemDataWidth = 512, + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_req_w_t = logic, + parameter type hpdcache_mem_resp_w_t = logic, + + localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0] +) + // }}} + // Ports + // {{{ +( + // Clock and reset signals + input logic clk_i, + input logic rst_ni, + + // Global control signals + output logic empty_o, + output logic full_o, + input logic flush_all_i, + + // Configuration signals + // Timer threshold + input wbuf_timecnt_t cfg_threshold_i, + // Reset timer on write + input logic cfg_reset_timecnt_on_write_i, + // Sequentialize write-after-write hazards + input logic cfg_sequential_waw_i, + // Inhibit write coalescing + input logic cfg_inhibit_write_coalescing_i, + + // Write interface + input logic write_i, + output logic write_ready_o, + input wbuf_addr_t write_addr_i, + input wbuf_data_t write_data_i, + input wbuf_be_t write_be_i, // byte-enable + input logic write_uc_i, // uncacheable write + + // Read hit interface + input wbuf_addr_t read_addr_i, + output logic read_hit_o, + input logic read_flush_hit_i, + + // Replay hit interface + input wbuf_addr_t replay_addr_i, + input logic replay_is_read_i, + output logic replay_open_hit_o, + output logic replay_pend_hit_o, + output logic replay_sent_hit_o, + output logic replay_not_ready_o, + + // Memory interface + input logic mem_req_write_ready_i, + output logic mem_req_write_valid_o, + output hpdcache_mem_req_t mem_req_write_o, + + input logic mem_req_write_data_ready_i, + output logic mem_req_write_data_valid_o, + output hpdcache_mem_req_w_t mem_req_write_data_o, + + output logic mem_resp_write_ready_o, + input logic mem_resp_write_valid_i, + input hpdcache_mem_resp_w_t mem_resp_write_i +); + // }}} + + // Internal signals + // {{{ + wbuf_addr_t send_addr; + wbuf_dir_ptr_t send_id; + logic send_uc; + wbuf_addr_t send_data_tag; + wbuf_data_buf_t send_data; + wbuf_be_buf_t send_be; + wbuf_dir_ptr_t ack_id; + logic ack_error; + // }}} + + // Wrapped write buffer + // {{{ + hpdcache_wbuf #( + .WBUF_DIR_ENTRIES (HPDCACHE_WBUF_DIR_ENTRIES), + .WBUF_DATA_ENTRIES (HPDCACHE_WBUF_DATA_ENTRIES), + .WBUF_WORD_WIDTH (HPDCACHE_REQ_DATA_WIDTH), + .WBUF_WORDS (HPDCACHE_WBUF_WORDS), + .WBUF_PA_WIDTH (HPDCACHE_PA_WIDTH), + .WBUF_TIMECNT_MAX ((2**HPDCACHE_WBUF_TIMECNT_WIDTH) - 1), + .WBUF_READ_MATCH_WIDTH (HPDCACHE_NLINE_WIDTH) + ) hpdcache_wbuf_i ( + .clk_i, + .rst_ni, + .empty_o, + .full_o, + .flush_all_i, + .cfg_threshold_i, + .cfg_reset_timecnt_on_write_i, + .cfg_sequential_waw_i, + .cfg_inhibit_write_coalescing_i, + .write_i, + .write_ready_o, + .write_addr_i, + .write_data_i, + .write_be_i, + .write_uc_i, + .read_addr_i, + .read_hit_o, + .read_flush_hit_i, + .replay_addr_i, + .replay_is_read_i, + .replay_open_hit_o, + .replay_pend_hit_o, + .replay_sent_hit_o, + .replay_not_ready_o, + .send_meta_ready_i (mem_req_write_ready_i), + .send_meta_valid_o (mem_req_write_valid_o), + .send_addr_o (send_addr), + .send_id_o (send_id), + .send_uc_o (send_uc), + .send_data_ready_i (mem_req_write_data_ready_i), + .send_data_valid_o (mem_req_write_data_valid_o), + .send_data_tag_o (send_data_tag), + .send_data_o (send_data), + .send_be_o (send_be), + .ack_i (mem_resp_write_valid_i), + .ack_id_i (ack_id), + .ack_error_i (ack_error) + ); + // }}} + + // Memory interface + // {{{ + assign mem_req_write_o.mem_req_addr = send_addr, + mem_req_write_o.mem_req_len = 0, + mem_req_write_o.mem_req_size = get_hpdcache_mem_size(HPDCACHE_WBUF_DATA_WIDTH/8), + mem_req_write_o.mem_req_id = hpdcache_mem_id_t'(send_id), + mem_req_write_o.mem_req_command = HPDCACHE_MEM_WRITE, + mem_req_write_o.mem_req_atomic = HPDCACHE_MEM_ATOMIC_ADD, + mem_req_write_o.mem_req_cacheable = ~send_uc; + + generate + localparam int unsigned WBUF_MEM_DATA_RATIO = HPDcacheMemDataWidth/HPDCACHE_WBUF_DATA_WIDTH; + localparam int unsigned WBUF_MEM_DATA_WORD_INDEX_WIDTH = $clog2(WBUF_MEM_DATA_RATIO); + + assign mem_req_write_data_o.mem_req_w_last = 1'b1; + + if (WBUF_MEM_DATA_RATIO > 1) + begin : wbuf_data_upsizing_gen + logic [HPDCACHE_WBUF_DATA_WIDTH/8-1:0][WBUF_MEM_DATA_RATIO-1:0] mem_req_be; + + // demux send BE + hpdcache_demux #( + .NOUTPUT (WBUF_MEM_DATA_RATIO), + .DATA_WIDTH (HPDCACHE_WBUF_DATA_WIDTH/8), + .ONE_HOT_SEL (1'b0) + ) mem_write_be_demux_i ( + .data_i (send_be), + .sel_i (send_data_tag[0 +: WBUF_MEM_DATA_WORD_INDEX_WIDTH]), + .data_o (mem_req_be) + ); + + assign mem_req_write_data_o.mem_req_w_data = {WBUF_MEM_DATA_RATIO{send_data}}, + mem_req_write_data_o.mem_req_w_be = mem_req_be; + + end else if (WBUF_MEM_DATA_RATIO == 1) + begin : wbuf_data_forwarding_gen + assign mem_req_write_data_o.mem_req_w_data = send_data, + mem_req_write_data_o.mem_req_w_be = send_be; + end + + // Assertions + // {{{ + // pragma translate_off + initial assert(WBUF_MEM_DATA_RATIO > 0) else + $error($sformatf("WBUF: data width of mem interface (%d) shall be g.e. to wbuf data width(%d)", + HPDcacheMemDataWidth, HPDCACHE_WBUF_DATA_WIDTH)); + // pragma translate_on + // }}} + endgenerate + + assign mem_resp_write_ready_o = 1'b1, + ack_id = mem_resp_write_i.mem_resp_w_id[0 +: HPDCACHE_WBUF_DIR_PTR_WIDTH], + ack_error = (mem_resp_write_i.mem_resp_w_error != HPDCACHE_MEM_RESP_OK); + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial assert (HPDCACHE_WBUF_DIR_PTR_WIDTH <= HPDcacheMemIdWidth) else + $fatal("HPDcacheMemIdWidth is not wide enough to fit all possible write buffer transactions"); + // pragma translate_on + // }}} + +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hwpf_stride/hwpf_stride.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hwpf_stride/hwpf_stride.sv new file mode 100644 index 00000000000..dfef92daac2 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hwpf_stride/hwpf_stride.sv @@ -0,0 +1,374 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Riccardo Alidori, Cesar Fuguet + * Maintainers(s): Cesar Fuguet + * Creation Date : June, 2021 + * Description : HPDcache Linear Hardware Memory Prefetcher. + * History : + */ +module hwpf_stride +import hwpf_stride_pkg::*; +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter int CACHE_LINE_BYTES = 64 +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // CSR + input logic csr_base_set_i, + input hwpf_stride_base_t csr_base_i, + input logic csr_param_set_i, + input hwpf_stride_param_t csr_param_i, + input logic csr_throttle_set_i, + input hwpf_stride_throttle_t csr_throttle_i, + + output hwpf_stride_base_t csr_base_o, + output hwpf_stride_param_t csr_param_o, + output hwpf_stride_throttle_t csr_throttle_o, + + // If high, the prefetcher is enabled and active + output logic busy_o, + + // Snooping + // Address to snoop on requests ports + output hpdcache_nline_t snoop_nline_o, + // If set to one, the snoop address matched one of the requests + input snoop_match_i, + + // D-Cache interface + output logic hpdcache_req_valid_o, + input logic hpdcache_req_ready_i, + output hpdcache_req_t hpdcache_req_o, + input logic hpdcache_rsp_valid_i, + input hpdcache_rsp_t hpdcache_rsp_i +); +// }}} + + import hpdcache_pkg::hpdcache_req_addr_t; + + // Definition of constants + // {{{ + localparam int STRIDE_WIDTH = $bits(csr_param_i.stride); + localparam int NBLOCKS_WIDTH = $bits(csr_param_i.nblocks); + localparam int NLINES_WIDTH = $bits(csr_param_i.nlines); + localparam int NWAIT_WIDTH = $bits(csr_throttle_i.nwait); + localparam int INFLIGHT_WIDTH = $bits(csr_throttle_i.ninflight); + localparam int NLINES_CNT_WIDTH = NLINES_WIDTH; + // }}} + + // Internal registers and signals + // {{{ + // FSM + enum { + IDLE, + SNOOP, + SEND_REQ, + WAIT, + DONE, + ABORT + } state_d, state_q; + + logic [NBLOCKS_WIDTH-1:0] nblocks_cnt_d, nblocks_cnt_q; + logic [NLINES_CNT_WIDTH-1:0] nlines_cnt_d, nlines_cnt_q; + logic [NWAIT_WIDTH-1:0] nwait_cnt_d, nwait_cnt_q; + logic [INFLIGHT_WIDTH-1:0] inflight_cnt_d, inflight_cnt_q; + logic inflight_inc, inflight_dec; + + hwpf_stride_base_t csr_base_q; + hwpf_stride_base_t shadow_base_q, shadow_base_d; + hwpf_stride_param_t csr_param_q; + hwpf_stride_param_t shadow_param_q, shadow_param_d; + hwpf_stride_throttle_t csr_throttle_q; + hwpf_stride_throttle_t shadow_throttle_q, shadow_throttle_d; + hpdcache_nline_t request_nline_q, request_nline_d; + + hpdcache_set_t hpdcache_req_set; + hpdcache_tag_t hpdcache_req_tag; + + logic csr_base_update; + hpdcache_nline_t increment_stride; + logic is_inflight_max; + + // Default assignment + assign increment_stride = hpdcache_nline_t'(shadow_param_q.stride) + 1'b1; + assign inflight_dec = hpdcache_rsp_valid_i; + assign snoop_nline_o = shadow_base_q.base_cline; + assign is_inflight_max = ( shadow_throttle_q.ninflight == '0 ) ? + 1'b0 : ( inflight_cnt_q >= shadow_throttle_q.ninflight ); + assign csr_base_o = csr_base_q; + assign csr_param_o = csr_param_q; + assign csr_throttle_o = csr_throttle_q; + // }}} + + // Dcache outputs + // {{{ + assign hpdcache_req_set = request_nline_q[0 +: HPDCACHE_SET_WIDTH], + hpdcache_req_tag = request_nline_q[HPDCACHE_SET_WIDTH +: HPDCACHE_TAG_WIDTH]; + + assign hpdcache_req_o.addr_offset = { hpdcache_req_set, {HPDCACHE_OFFSET_WIDTH{1'b0}} }, + hpdcache_req_o.wdata = '0, + hpdcache_req_o.op = HPDCACHE_REQ_CMO, + hpdcache_req_o.be = '1, + hpdcache_req_o.size = HPDCACHE_REQ_CMO_PREFETCH, + hpdcache_req_o.sid = '0, // this is set when connecting to the dcache + hpdcache_req_o.tid = '0, // this is set by the wrapper of the prefetcher + hpdcache_req_o.need_rsp = 1'b1, + hpdcache_req_o.phys_indexed = 1'b1, + hpdcache_req_o.addr_tag = hpdcache_req_tag, + hpdcache_req_o.pma.uncacheable = 1'b0, + hpdcache_req_o.pma.io = 1'b0; + // }}} + + // Set state of internal registers + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin + if (!rst_ni) begin + csr_base_q <= '0; + csr_param_q <= '0; + shadow_base_q <= '0; + shadow_param_q <= '0; + shadow_throttle_q <= '0; + request_nline_q <= '0; + state_q <= IDLE; + end else begin + if (csr_base_set_i) csr_base_q <= csr_base_i; + else if (csr_base_update) csr_base_q <= shadow_base_d; + if (csr_param_set_i) csr_param_q <= csr_param_i; + if (csr_throttle_set_i) csr_throttle_q <= csr_throttle_i; + shadow_base_q <= shadow_base_d; + shadow_param_q <= shadow_param_d; + shadow_throttle_q <= shadow_throttle_d; + request_nline_q <= request_nline_d; + state_q <= state_d; + end + end + // }}} + + // Update internal counters + // {{{ + always_comb begin : inflight_cnt + inflight_cnt_d = inflight_cnt_q; + + // Every time we send a dcache request, increment the counter + if ( inflight_inc ) begin + inflight_cnt_d++; + end + + // Every time we got a response from the cache, decrement the counter + if ( inflight_dec && ( inflight_cnt_q > 0 )) begin + inflight_cnt_d--; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + nblocks_cnt_q <= '0; + nlines_cnt_q <= '0; + nwait_cnt_q <= '0; + inflight_cnt_q <= '0; + end else begin + nblocks_cnt_q <= nblocks_cnt_d; + nlines_cnt_q <= nlines_cnt_d; + nwait_cnt_q <= nwait_cnt_d; + inflight_cnt_q <= inflight_cnt_d; + end + end + // }}} + + // FSM + // {{{ + always_comb begin : fsm_control + // default assignments + hpdcache_req_valid_o = 1'b0; + nblocks_cnt_d = nblocks_cnt_q; + nlines_cnt_d = nlines_cnt_q; + nwait_cnt_d = nwait_cnt_q; + inflight_inc = 1'b0; + busy_o = 1'b0; + csr_base_update = 1'b0; + + shadow_base_d = shadow_base_q; + shadow_param_d = shadow_param_q; + shadow_throttle_d = shadow_throttle_q; + request_nline_d = request_nline_q; + state_d = state_q; + + case ( state_q ) + + IDLE: begin + // If enabled, go snooping the dcache ports + if ( csr_base_q.enable ) begin + shadow_base_d = csr_base_q; + if (( csr_param_q.nlines > 0 ) || ( csr_param_q.nblocks > 0 )) begin + shadow_param_d = csr_param_q; + shadow_throttle_d = csr_throttle_q; + state_d = SNOOP; + end else begin + // no prefetch needed, disarm immediately + shadow_base_d.enable = 1'b0; + csr_base_update = 1'b1; + end + end + end + + + SNOOP: begin + if ( csr_base_q.enable ) begin + // If a snooper matched an address, send the request + if ( snoop_match_i ) begin + state_d = SEND_REQ; + + if ( shadow_param_q.nlines == 0 ) begin + // skip the first block + request_nline_d = shadow_base_q.base_cline + + hpdcache_nline_t'(increment_stride); + nblocks_cnt_d = ( shadow_param_q.nblocks > 0 ) ? + shadow_param_q.nblocks - 1 : 0; + nlines_cnt_d = 0; + + // update the base cacheline to the first one of the next block + shadow_base_d.base_cline = request_nline_d; + end else begin + // skip the first cacheline (of the first block) + request_nline_d = shadow_base_q.base_cline + 1'b1; + nblocks_cnt_d = shadow_param_q.nblocks; + nlines_cnt_d = shadow_param_q.nlines - 1; + end + end + end else begin + state_d = IDLE; + end + end + + + SEND_REQ: begin + busy_o = 1'b1; + + // make the prefetch request to memory + hpdcache_req_valid_o = 1'b1; + + // we've got a grant, so we can move to the next request + if ( hpdcache_req_ready_i ) begin + inflight_inc = 1'b1; + + if ( nlines_cnt_q == 0 ) begin + // go to the first cacheline of the next block + request_nline_d = shadow_base_q.base_cline + + hpdcache_nline_t'(increment_stride); + nblocks_cnt_d = ( nblocks_cnt_q > 0 ) ? nblocks_cnt_q - 1 : 0; + nlines_cnt_d = shadow_param_q.nlines; + + // update the base cacheline to the first one of the next block + shadow_base_d.base_cline = request_nline_d; + end else begin + // go to the next cacheline (within the same block) + request_nline_d = request_nline_q + 1'b1; + nlines_cnt_d = nlines_cnt_q - 1; + end + + // if the NWAIT parameter is equal 0, we can issue a request every cycle + if (( nblocks_cnt_q == 0 ) && ( nlines_cnt_q == 0 )) begin + state_d = DONE; + end else if ( shadow_throttle_q.nwait == 0 ) begin + // Wait if the number of inflight requests is greater than + // the maximum indicated. Otherwise, send the next request + state_d = is_inflight_max ? WAIT : SEND_REQ; + end else begin + // Wait the indicated cycles before sending the next request + nwait_cnt_d = shadow_throttle_q.nwait; + state_d = WAIT; + end + + if ( !csr_base_q.enable ) state_d = ABORT; + end + end + + + WAIT: begin + // Wait until: + // - the indicated number of wait cycles between requests is reached (nwait) + // - the number of inflight requests is below the indicated maximum (ninflight) + busy_o = 1'b1; + if ( csr_base_q.enable ) begin + if ( !is_inflight_max && ( nwait_cnt_q == 0 )) begin + state_d = SEND_REQ; + end + + if ( nwait_cnt_q > 0 ) begin + nwait_cnt_d = nwait_cnt_q - 1; + end + end else begin + state_d = ABORT; + end + end + + + DONE: begin + busy_o = 1'b1; + if ( csr_base_q.enable ) begin + if (( inflight_cnt_q == 0 ) && !is_inflight_max && ( nwait_cnt_q == 0 )) begin + // Copy back shadow base register into the user visible one + csr_base_update = 1'b1; + + // Check the rearm bit + if ( shadow_base_q.rearm ) begin + state_d = SNOOP; + end else begin + state_d = IDLE; + + // disarm the prefetcher + shadow_base_d.enable = 1'b0; + end + + // Check the cycle bit + if ( shadow_base_q.cycle ) begin + // restore the base address + shadow_base_d.base_cline = csr_base_q.base_cline; + end + end + + if ( nwait_cnt_q > 0 ) begin + nwait_cnt_d = nwait_cnt_q - 1; + end + end else begin + state_d = ABORT; + end + end + + ABORT: begin + busy_o = 1'b1; + if ( inflight_cnt_q == 0 ) begin + state_d = IDLE; + end + end + endcase + end + // }}} +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv new file mode 100644 index 00000000000..1aa9df485ff --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hwpf_stride/hwpf_stride_arb.sv @@ -0,0 +1,117 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Author(s) : Riccardo Alidori, Cesar Fuguet + * Creation Date : June, 2021 + * Description : Hw prefetchers arbiter + * History : + */ +module hwpf_stride_arb +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter NUM_HW_PREFETCH = 4 +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // Dcache input interface + input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_valid_i, + output logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_ready_o, + input hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_req_i, + output logic [NUM_HW_PREFETCH-1:0] hwpf_stride_rsp_valid_o, + output hpdcache_rsp_t [NUM_HW_PREFETCH-1:0] hwpf_stride_rsp_o, // Not used + + // Dcache output interface + output logic hpdcache_req_valid_o, + input logic hpdcache_req_ready_i, + output hpdcache_req_t hpdcache_req_o, + input logic hpdcache_rsp_valid_i, + input hpdcache_rsp_t hpdcache_rsp_i // Not used +); +// }}} + + // Internal signals + // {{{ + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_valid; + hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_req; + logic [NUM_HW_PREFETCH-1:0] arb_req_gnt; + // }}} + + // Requesters arbiter + // {{{ + // Pack request ports + genvar gen_i; + generate + for (gen_i = 0; gen_i < NUM_HW_PREFETCH; gen_i++) begin : gen_hwpf_stride_req + assign hwpf_stride_req_ready_o[gen_i] = arb_req_gnt[gen_i] & hpdcache_req_ready_i, + hwpf_stride_req_valid[gen_i] = hwpf_stride_req_valid_i[gen_i], + hwpf_stride_req[gen_i] = hwpf_stride_req_i[gen_i]; + end + endgenerate + + // Arbiter + hpdcache_rrarb #( + .N (NUM_HW_PREFETCH) + ) hwpf_stride_req_arbiter_i ( + .clk_i, + .rst_ni, + .req_i (hwpf_stride_req_valid), + .gnt_o (arb_req_gnt), + .ready_i (hpdcache_req_ready_i) + ); + + // Request Multiplexor + hpdcache_mux #( + .NINPUT (NUM_HW_PREFETCH), + .DATA_WIDTH ($bits(hpdcache_req_t)), + .ONE_HOT_SEL (1'b1) + ) hwpf_stride_req_mux_i ( + .data_i (hwpf_stride_req), + .sel_i (arb_req_gnt), + .data_o (hpdcache_req_o) + ); + + assign hpdcache_req_valid_o = |arb_req_gnt; + // }}} + + // Response demultiplexor + // {{{ + // As the HW prefetcher does not need the TID field in the request, we + // use it to transport the identifier of the specific hardware + // prefetcher. + // This way we share the same SID for all HW prefetchers. Using + // different SIDs means that we need different ports to the cache and + // we actually want to reduce those. + always_comb + begin : resp_demux + for (int unsigned i = 0; i < NUM_HW_PREFETCH; i++) begin + hwpf_stride_rsp_valid_o[i] = hpdcache_rsp_valid_i && (i == int'(hpdcache_rsp_i.tid)); + hwpf_stride_rsp_o[i] = hpdcache_rsp_i; + end + end + // }}} +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv new file mode 100644 index 00000000000..3470b78620d --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hwpf_stride/hwpf_stride_pkg.sv @@ -0,0 +1,68 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : January, 2023 + * Description : High-Performance, Data-cache (HPDcache) HW memory + * prefetcher package + * History : + */ +package hwpf_stride_pkg; + // Base address configuration register of the hardware memory prefetcher + // {{{ + typedef struct packed { + logic [63:6] base_cline; + logic [5:3] unused; + logic cycle; + logic rearm; + logic enable; + } hwpf_stride_base_t; + // }}} + + // Parameters configuration register of the hardware memory prefetcher + // {{{ + typedef struct packed { + logic [63:48] nblocks; + logic [47:32] nlines; + logic [31:0] stride; + } hwpf_stride_param_t; + // }}} + + // Throttle configuration register of the hardware memory prefetcher + // {{{ + typedef struct packed { + logic [31:16] ninflight; + logic [15:0] nwait; + } hwpf_stride_throttle_t; + // }}} + + // Status register of the hardware memory prefetcher + // {{{ + typedef struct packed { + logic [63:48] unused1; + logic [47:32] busy; + logic free; + logic [30:20] unused0; + logic [19:16] free_index; + logic [15:0] enabled; + } hwpf_stride_status_t; + // }}} + +endpackage diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv new file mode 100644 index 00000000000..ba995b50621 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hwpf_stride/hwpf_stride_snooper.sv @@ -0,0 +1,38 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Riccardo Alidori, Cesar Fuguet + * Creation Date : June, 2021 + * Description : Snooper used by the hardware memory prefetcher + * History : + */ +module hwpf_stride_snooper +import hpdcache_pkg::*; +( + input logic en_i, // Snooper enable bit. + input hpdcache_nline_t base_nline_i, // Address to check + input hpdcache_nline_t snoop_addr_i, // Input address to snoop + output snoop_match_o // If high, the Snoopers matched the snoop_address +); + + // The snooper match if enabled and the two addresses are equal + assign snoop_match_o = en_i && ( base_nline_i == snoop_addr_i ); + +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv new file mode 100644 index 00000000000..fa1cfa4f9ee --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/hwpf_stride/hwpf_stride_wrapper.sv @@ -0,0 +1,265 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Riccardo Alidori, Cesar Fuguet + * Creation Date : June, 2021 + * Description : Linear Hardware Memory Prefetcher wrapper. + * History : + */ +module hwpf_stride_wrapper +import hwpf_stride_pkg::*; +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter NUM_HW_PREFETCH = 4, + parameter NUM_SNOOP_PORTS = 1 +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // CSR + // {{{ + input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_base_set_i, + input hwpf_stride_base_t [NUM_HW_PREFETCH-1:0] hwpf_stride_base_i, + output hwpf_stride_base_t [NUM_HW_PREFETCH-1:0] hwpf_stride_base_o, + + input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_param_set_i, + input hwpf_stride_param_t [NUM_HW_PREFETCH-1:0] hwpf_stride_param_i, + output hwpf_stride_param_t [NUM_HW_PREFETCH-1:0] hwpf_stride_param_o, + + input logic [NUM_HW_PREFETCH-1:0] hwpf_stride_throttle_set_i, + input hwpf_stride_throttle_t [NUM_HW_PREFETCH-1:0] hwpf_stride_throttle_i, + output hwpf_stride_throttle_t [NUM_HW_PREFETCH-1:0] hwpf_stride_throttle_o, + + output hwpf_stride_status_t hwpf_stride_status_o, + // }}} + + // Snooping + // {{{ + input logic [NUM_SNOOP_PORTS-1:0] snoop_valid_i, + input logic [NUM_SNOOP_PORTS-1:0] snoop_abort_i, + input hpdcache_req_offset_t [NUM_SNOOP_PORTS-1:0] snoop_addr_offset_i, + input hpdcache_tag_t [NUM_SNOOP_PORTS-1:0] snoop_addr_tag_i, + input logic [NUM_SNOOP_PORTS-1:0] snoop_phys_indexed_i, + // }}} + + // Dcache interface + // {{{ + input hpdcache_req_sid_t hpdcache_req_sid_i, + output logic hpdcache_req_valid_o, + input logic hpdcache_req_ready_i, + output hpdcache_req_t hpdcache_req_o, + output logic hpdcache_req_abort_o, + output hpdcache_tag_t hpdcache_req_tag_o, + output hpdcache_pma_t hpdcache_req_pma_o, + input logic hpdcache_rsp_valid_i, + input hpdcache_rsp_t hpdcache_rsp_i + // }}} +); +// }}} + + // Internal registers + // {{{ + logic [NUM_SNOOP_PORTS-1:0] snoop_valid_q; + hpdcache_req_offset_t [NUM_SNOOP_PORTS-1:0] snoop_addr_offset_q; + // }}} + + // Internal signals + // {{{ + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_enable; + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_free; + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_status_busy; + logic [3:0] hwpf_stride_status_free_idx; + + hpdcache_nline_t [NUM_HW_PREFETCH-1:0] hwpf_snoop_nline; + logic [NUM_HW_PREFETCH-1:0] hwpf_snoop_match; + + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_valid; + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_req_ready; + hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_req; + + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_req_valid; + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_req_ready; + hpdcache_req_t [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_req; + logic [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_rsp_valid; + hpdcache_rsp_t [NUM_HW_PREFETCH-1:0] hwpf_stride_arb_in_rsp; + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial + begin + max_hwpf_stride_assert: assert (NUM_HW_PREFETCH <= 16) else + $error("hwpf_stride: maximum number of HW prefetchers is 16"); + end + // pragma translate_on + // }}} + + // Compute the status information + // {{{ + always_comb begin: hwpf_stride_priority_encoder + hwpf_stride_status_free_idx = '0; + for (int unsigned i = 0; i < NUM_HW_PREFETCH; i++) begin + if (hwpf_stride_free[i]) begin + hwpf_stride_status_free_idx = i; + break; + end + end + end + + // Free flag of engines + assign hwpf_stride_free = ~(hwpf_stride_enable | hwpf_stride_status_busy); + // Busy flags + assign hwpf_stride_status_o[63:32] = {{32-NUM_HW_PREFETCH{1'b0}}, hwpf_stride_status_busy}; + // Global free flag + assign hwpf_stride_status_o[31] = |hwpf_stride_free; + // Free Index + assign hwpf_stride_status_o[30:16] = {11'b0, hwpf_stride_status_free_idx}; + // Enable flags + assign hwpf_stride_status_o[15:0] = {{16-NUM_HW_PREFETCH{1'b0}}, hwpf_stride_enable}; + // }}} + + // Hardware prefetcher engines + // {{{ + generate + for (genvar j = 0; j < NUM_SNOOP_PORTS; j++) begin + always_ff @(posedge clk_i or negedge rst_ni) + begin : snoop_ff + if (!rst_ni) begin + snoop_valid_q[j] <= 1'b0; + snoop_addr_offset_q[j] <= '0; + end else begin + if (snoop_phys_indexed_i[j]) begin + snoop_valid_q[j] <= snoop_valid_i[j]; + snoop_addr_offset_q[j] <= snoop_addr_offset_i[j]; + end + end + end + end + + for (genvar i = 0; i < NUM_HW_PREFETCH; i++) begin + assign hwpf_stride_enable[i] = hwpf_stride_base_o[i].enable; + + // Compute snoop match signals + // {{{ + always_comb + begin : snoop_comb + hwpf_snoop_match[i] = 1'b0; + for (int j = 0; j < NUM_SNOOP_PORTS; j++) begin + automatic logic snoop_valid; + automatic hpdcache_req_offset_t snoop_offset; + automatic hpdcache_nline_t snoop_nline; + + if (snoop_phys_indexed_i[j]) begin + snoop_valid = snoop_valid_i[j]; + snoop_offset = snoop_addr_offset_i[j]; + end else begin + snoop_valid = snoop_valid_q[j]; + snoop_offset = snoop_addr_offset_q[j]; + end + snoop_nline = {snoop_addr_tag_i[j], snoop_offset}; + hwpf_snoop_match[i] |= (snoop_valid && !snoop_abort_i[j] && + (hwpf_snoop_nline[i] == snoop_nline)); + end + end + // }}} + + hwpf_stride #( + .CACHE_LINE_BYTES ( HPDCACHE_CL_WIDTH/8 ) + ) hwpf_stride_i( + .clk_i, + .rst_ni, + + .csr_base_set_i ( hwpf_stride_base_set_i[i] ), + .csr_base_i ( hwpf_stride_base_i[i] ), + .csr_param_set_i ( hwpf_stride_param_set_i[i] ), + .csr_param_i ( hwpf_stride_param_i[i] ), + .csr_throttle_set_i ( hwpf_stride_throttle_set_i[i] ), + .csr_throttle_i ( hwpf_stride_throttle_i[i] ), + + .csr_base_o ( hwpf_stride_base_o[i] ), + .csr_param_o ( hwpf_stride_param_o[i] ), + .csr_throttle_o ( hwpf_stride_throttle_o[i] ), + + .busy_o ( hwpf_stride_status_busy[i] ), + + .snoop_nline_o ( hwpf_snoop_nline[i] ), + .snoop_match_i ( hwpf_snoop_match[i] ), + + .hpdcache_req_valid_o ( hwpf_stride_req_valid[i] ), + .hpdcache_req_ready_i ( hwpf_stride_req_ready[i] ), + .hpdcache_req_o ( hwpf_stride_req[i] ), + .hpdcache_rsp_valid_i ( hwpf_stride_arb_in_rsp_valid[i] ), + .hpdcache_rsp_i ( hwpf_stride_arb_in_rsp[i] ) + ); + + assign hwpf_stride_req_ready[i] = hwpf_stride_arb_in_req_ready[i], + hwpf_stride_arb_in_req_valid[i] = hwpf_stride_req_valid[i], + hwpf_stride_arb_in_req[i].addr_offset = hwpf_stride_req[i].addr_offset, + hwpf_stride_arb_in_req[i].wdata = hwpf_stride_req[i].wdata, + hwpf_stride_arb_in_req[i].op = hwpf_stride_req[i].op, + hwpf_stride_arb_in_req[i].be = hwpf_stride_req[i].be, + hwpf_stride_arb_in_req[i].size = hwpf_stride_req[i].size, + hwpf_stride_arb_in_req[i].sid = hpdcache_req_sid_i, + hwpf_stride_arb_in_req[i].tid = hpdcache_req_tid_t'(i), + hwpf_stride_arb_in_req[i].need_rsp = hwpf_stride_req[i].need_rsp, + hwpf_stride_arb_in_req[i].phys_indexed = hwpf_stride_req[i].phys_indexed, + hwpf_stride_arb_in_req[i].addr_tag = '0, + hwpf_stride_arb_in_req[i].pma = '0; + end + endgenerate + // }}} + + // Hardware prefetcher arbiter betweem engines + // {{{ + hwpf_stride_arb #( + .NUM_HW_PREFETCH ( NUM_HW_PREFETCH ) + ) hwpf_stride_arb_i ( + .clk_i, + .rst_ni, + + // DCache input interface + .hwpf_stride_req_valid_i ( hwpf_stride_arb_in_req_valid ), + .hwpf_stride_req_ready_o ( hwpf_stride_arb_in_req_ready ), + .hwpf_stride_req_i ( hwpf_stride_arb_in_req ), + .hwpf_stride_rsp_valid_o ( hwpf_stride_arb_in_rsp_valid ), + .hwpf_stride_rsp_o ( hwpf_stride_arb_in_rsp ), + + // DCache output interface + .hpdcache_req_valid_o, + .hpdcache_req_ready_i, + .hpdcache_req_o, + .hpdcache_rsp_valid_i, + .hpdcache_rsp_i + ); + + assign hpdcache_req_abort_o = 1'b0, // unused on physically indexed requests + hpdcache_req_tag_o = '0, // unused on physically indexed requests + hpdcache_req_pma_o = '0; // unused on physically indexed requests + // }}} + +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/target/cva6/cva6_hpdcache_cmo_if_adapter.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/target/cva6/cva6_hpdcache_cmo_if_adapter.sv new file mode 100644 index 00000000000..a487006c6c6 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/target/cva6/cva6_hpdcache_cmo_if_adapter.sv @@ -0,0 +1,186 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Interface adapter for the CMO interface of the CVA6 core + * History : + */ +module cva6_hpdcache_cmo_if_adapter +import hpdcache_pkg::*; + +// Parameters +// {{{ +#( + parameter type cmo_req_t = logic, + parameter type cmo_rsp_t = logic +) +// }}} + +// Ports +// {{{ +( + // Clock and active-low reset pins + input logic clk_i, + input logic rst_ni, + + // Port ID + input hpdcache_pkg::hpdcache_req_sid_t dcache_req_sid_i, + + // Request/response ports from/to the CVA6 core + input cmo_req_t cva6_cmo_req_i, + output cmo_rsp_t cva6_cmo_resp_o, + + // Request port to the L1 Dcache + output logic dcache_req_valid_o, + input logic dcache_req_ready_i, + output hpdcache_pkg::hpdcache_req_t dcache_req_o, + + // Response port from the L1 Dcache + input logic dcache_rsp_valid_i, + input hpdcache_pkg::hpdcache_rsp_t dcache_rsp_i +); +// }}} + + // Internal nets and registers + // {{{ + enum { + FORWARD_IDLE, + FORWARD_CMO, + FORWARD_CMO_ACK + } forward_state_q, forward_state_d; + + logic forward_cmo; + hpdcache_pkg::hpdcache_req_t dcache_req_cmo; + logic [ariane_pkg::TRANS_ID_BITS-1:0] cmo_tid_q, cmo_tid_d; + logic cmo_ack; + logic stall; + // }}} + + // Request forwarding + // {{{ + always_comb + begin : req_forward_comb + forward_state_d = forward_state_q; + forward_cmo = 1'b0; + cmo_tid_d = cmo_tid_q; + cmo_ack = 1'b0; + stall = 1'b0; + + case (forward_state_q) + FORWARD_IDLE: begin + if (cva6_cmo_req_i.req) begin + stall = ~dcache_req_ready_i; + forward_cmo = 1'b1; + cmo_tid_d = cva6_cmo_req_i.trans_id; + if (!dcache_req_ready_i) begin + forward_state_d = FORWARD_CMO; + end else begin + forward_state_d = FORWARD_CMO_ACK; + end + end + end + + FORWARD_CMO: begin + stall = ~dcache_req_ready_i; + forward_cmo = 1'b1; + if (dcache_req_ready_i) begin + forward_state_d = FORWARD_CMO_ACK; + end + end + + FORWARD_CMO_ACK: begin + stall = 1'b1; + cmo_ack = 1'b1; + if (cva6_cmo_req_i.req) begin + stall = ~dcache_req_ready_i; + forward_cmo = 1'b1; + cmo_tid_d = cva6_cmo_req_i.trans_id; + if (!dcache_req_ready_i) begin + forward_state_d = FORWARD_CMO; + end else begin + forward_state_d = FORWARD_CMO_ACK; + end + end else begin + forward_state_d = FORWARD_IDLE; + end + end + endcase + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : forward_ff + if (!rst_ni) begin + forward_state_q <= FORWARD_IDLE; + cmo_tid_q <= '0; + end else begin + forward_state_q <= forward_state_d; + cmo_tid_q <= cmo_tid_d; + end + end + + // CMO request + // {{{ + always_comb + begin : cmo_req + dcache_req_cmo.addr = hpdcache_req_addr_t'(cva6_cmo_req_i.address); + dcache_req_cmo.need_rsp = 1'b0; + dcache_req_cmo.uncacheable = 1'b0; + dcache_req_cmo.sid = dcache_req_sid_i; + dcache_req_cmo.tid = cva6_cmo_req_i.trans_id; + dcache_req_cmo.wdata = '0; + dcache_req_cmo.be = '0; + dcache_req_cmo.op = HPDCACHE_REQ_CMO; + dcache_req_cmo.size = '0; + case (cva6_cmo_req_i.cmo_op) + ariane_pkg::CMO_CLEAN, + ariane_pkg::CMO_FLUSH, + ariane_pkg::CMO_ZERO: begin + // FIXME + end + ariane_pkg::CMO_INVAL: begin + dcache_req_cmo.size = HPDCACHE_REQ_CMO_INVAL_NLINE; + end + ariane_pkg::CMO_PREFETCH_R, + ariane_pkg::CMO_PREFETCH_W: begin + dcache_req_cmo.size = HPDCACHE_REQ_CMO_PREFETCH; + end + ariane_pkg::CMO_CLEAN_ALL, + ariane_pkg::CMO_FLUSH_ALL: begin + end + ariane_pkg::CMO_INVAL_ALL: begin + dcache_req_cmo.size = HPDCACHE_REQ_CMO_INVAL_ALL; + end + endcase + end + // }}} + + assign dcache_req_valid_o = forward_cmo, + dcache_req_o = dcache_req_cmo, + cva6_cmo_resp_o.req_ready = ~stall; + // }}} + + // Response forwarding + // {{{ + assign cva6_cmo_resp_o.ack = cmo_ack, + cva6_cmo_resp_o.trans_id = cmo_tid_q; + // }}} + +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/target/cva6/cva6_hpdcache_if_adapter.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/target/cva6/cva6_hpdcache_if_adapter.sv new file mode 100644 index 00000000000..dd298efed4d --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/target/cva6/cva6_hpdcache_if_adapter.sv @@ -0,0 +1,217 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Interface adapter for the CVA6 core + * History : + */ +module cva6_hpdcache_if_adapter +import hpdcache_pkg::*; + +// Parameters +// {{{ +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter bit is_load_port = 1'b1 +) +// }}} + +// Ports +// {{{ +( + // Clock and active-low reset pins + input logic clk_i, + input logic rst_ni, + + // Port ID + input hpdcache_pkg::hpdcache_req_sid_t hpdcache_req_sid_i, + + // Request/response ports from/to the CVA6 core + input ariane_pkg::dcache_req_i_t cva6_req_i, + output ariane_pkg::dcache_req_o_t cva6_req_o, + input ariane_pkg::amo_req_t cva6_amo_req_i, + output ariane_pkg::amo_resp_t cva6_amo_resp_o, + + // Request port to the L1 Dcache + output logic hpdcache_req_valid_o, + input logic hpdcache_req_ready_i, + output hpdcache_pkg::hpdcache_req_t hpdcache_req_o, + output logic hpdcache_req_abort_o, + output hpdcache_pkg::hpdcache_tag_t hpdcache_req_tag_o, + output hpdcache_pkg::hpdcache_pma_t hpdcache_req_pma_o, + + // Response port from the L1 Dcache + input logic hpdcache_rsp_valid_i, + input hpdcache_pkg::hpdcache_rsp_t hpdcache_rsp_i +); +// }}} + + // Internal nets and registers + // {{{ + logic forward_store, forward_amo; + logic hpdcache_req_is_uncacheable; + // }}} + + // Request forwarding + // {{{ + generate + // LOAD request + // {{{ + if (is_load_port == 1'b1) begin : load_port_gen + assign hpdcache_req_is_uncacheable = + !config_pkg::is_inside_cacheable_regions(CVA6Cfg, + {{64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}} + , cva6_req_i.address_tag + , {ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}}}); + + // Request forwarding + assign hpdcache_req_valid_o = cva6_req_i.data_req, + hpdcache_req_o.addr_offset = cva6_req_i.address_index, + hpdcache_req_o.wdata = '0, + hpdcache_req_o.op = hpdcache_pkg::HPDCACHE_REQ_LOAD, + hpdcache_req_o.be = cva6_req_i.data_be, + hpdcache_req_o.size = cva6_req_i.data_size, + hpdcache_req_o.sid = hpdcache_req_sid_i, + hpdcache_req_o.tid = cva6_req_i.data_id, + hpdcache_req_o.need_rsp = 1'b1, + hpdcache_req_o.phys_indexed = 1'b0, + hpdcache_req_o.addr_tag = '0, // unused on virtually indexed request + hpdcache_req_o.pma = '0; // unused on virtually indexed request + + assign hpdcache_req_abort_o = cva6_req_i.kill_req, + hpdcache_req_tag_o = cva6_req_i.address_tag, + hpdcache_req_pma_o.uncacheable = hpdcache_req_is_uncacheable, + hpdcache_req_pma_o.io = 1'b0; + + // Response forwarding + assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i, + cva6_req_o.data_rdata = hpdcache_rsp_i.rdata, + cva6_req_o.data_rid = hpdcache_rsp_i.tid, + cva6_req_o.data_gnt = hpdcache_req_ready_i; + end + // }}} + + // STORE/AMO request + // {{{ + else begin : store_amo_gen + hpdcache_req_addr_t amo_addr; + hpdcache_req_offset_t amo_addr_offset; + hpdcache_tag_t amo_tag; + logic amo_is_word, amo_is_word_hi; + hpdcache_req_data_t amo_data; + hpdcache_req_be_t amo_data_be; + hpdcache_req_op_t amo_op; + logic [31:0] amo_resp_word; + + // AMO logic + // {{{ + always_comb + begin : amo_op_comb + amo_addr = cva6_amo_req_i.operand_a; + amo_addr_offset = amo_addr[0 +: HPDCACHE_REQ_OFFSET_WIDTH]; + amo_tag = amo_addr[HPDCACHE_REQ_OFFSET_WIDTH +: HPDCACHE_TAG_WIDTH]; + amo_is_word = (cva6_amo_req_i.size == 2'b10); + amo_is_word_hi = cva6_amo_req_i.operand_a[2]; + + amo_data = amo_is_word ? {2{cva6_amo_req_i.operand_b[0 +: 32]}} + : cva6_amo_req_i.operand_b; + + amo_data_be = amo_is_word_hi ? 8'hf0 : + amo_is_word ? 8'h0f : 8'hff; + + unique case(cva6_amo_req_i.amo_op) + ariane_pkg::AMO_LR: amo_op = HPDCACHE_REQ_AMO_LR; + ariane_pkg::AMO_SC: amo_op = HPDCACHE_REQ_AMO_SC; + ariane_pkg::AMO_SWAP: amo_op = HPDCACHE_REQ_AMO_SWAP; + ariane_pkg::AMO_ADD: amo_op = HPDCACHE_REQ_AMO_ADD; + ariane_pkg::AMO_AND: amo_op = HPDCACHE_REQ_AMO_AND; + ariane_pkg::AMO_OR: amo_op = HPDCACHE_REQ_AMO_OR; + ariane_pkg::AMO_XOR: amo_op = HPDCACHE_REQ_AMO_XOR; + ariane_pkg::AMO_MAX: amo_op = HPDCACHE_REQ_AMO_MAX; + ariane_pkg::AMO_MAXU: amo_op = HPDCACHE_REQ_AMO_MAXU; + ariane_pkg::AMO_MIN: amo_op = HPDCACHE_REQ_AMO_MIN; + ariane_pkg::AMO_MINU: amo_op = HPDCACHE_REQ_AMO_MINU; + default: amo_op = HPDCACHE_REQ_LOAD; + endcase + end + + assign amo_resp_word = amo_is_word_hi ? hpdcache_rsp_i.rdata[0][32 +: 32] + : hpdcache_rsp_i.rdata[0][0 +: 32]; + // }}} + + // Request forwarding + // {{{ + assign hpdcache_req_is_uncacheable = + !config_pkg::is_inside_cacheable_regions(CVA6Cfg, + {{64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}} + , hpdcache_req_o.addr_tag, {ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}}}); + + assign forward_store = cva6_req_i.data_req, + forward_amo = cva6_amo_req_i.req; + + assign hpdcache_req_valid_o = forward_store | forward_amo, + hpdcache_req_o.addr_offset = forward_amo ? amo_addr_offset + : cva6_req_i.address_index, + hpdcache_req_o.wdata = forward_amo ? amo_data + : cva6_req_i.data_wdata, + hpdcache_req_o.op = forward_amo ? amo_op + : hpdcache_pkg::HPDCACHE_REQ_STORE, + hpdcache_req_o.be = forward_amo ? amo_data_be + : cva6_req_i.data_be, + hpdcache_req_o.size = forward_amo ? cva6_amo_req_i.size + : cva6_req_i.data_size, + hpdcache_req_o.sid = hpdcache_req_sid_i, + hpdcache_req_o.tid = forward_amo ? '1 : '0, + hpdcache_req_o.need_rsp = forward_amo, + hpdcache_req_o.phys_indexed = 1'b1, + hpdcache_req_o.addr_tag = forward_amo ? amo_tag : cva6_req_i.address_tag, + hpdcache_req_o.pma.uncacheable = hpdcache_req_is_uncacheable, + hpdcache_req_o.pma.io = 1'b0, + hpdcache_req_abort_o = 1'b0, // unused on physically indexed requests + hpdcache_req_tag_o = '0, // unused on physically indexed requests + hpdcache_req_pma_o = '0; // unused on physically indexed requests + // }}} + + // Response forwarding + // {{{ + assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid != '1), + cva6_req_o.data_rdata = hpdcache_rsp_i.rdata, + cva6_req_o.data_rid = hpdcache_rsp_i.tid, + cva6_req_o.data_gnt = hpdcache_req_ready_i; + + assign cva6_amo_resp_o.ack = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid == '1), + cva6_amo_resp_o.result = amo_is_word ? {{32{amo_resp_word[31]}}, amo_resp_word} + : hpdcache_rsp_i.rdata[0][63:0]; + // }}} + end + // }}} + endgenerate + // }}} + + // Assertions + // {{{ + // pragma translate_off + forward_one_request_assert: assert property (@(posedge clk_i) + ($onehot0({forward_store, forward_amo}))) else + $error("Only one request shall be forwarded"); + // pragma translate_on + // }}} +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/target/cva6/cva6_hpdcache_params_pkg.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/target/cva6/cva6_hpdcache_params_pkg.sv new file mode 100644 index 00000000000..da7baa0bd19 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/target/cva6/cva6_hpdcache_params_pkg.sv @@ -0,0 +1,182 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2023 + * Description : Package with parameters for the HPDcache in a CVA6 platform + * History : + */ +package hpdcache_params_pkg; + // Imports from the CVA6 configuration package + // {{{ + import cva6_config_pkg::CVA6ConfigXlen; + import cva6_config_pkg::CVA6ConfigDcacheByteSize; + import cva6_config_pkg::CVA6ConfigDcacheSetAssoc; + import cva6_config_pkg::CVA6ConfigDcacheLineWidth; + import cva6_config_pkg::CVA6ConfigDcacheIdWidth; + import cva6_config_pkg::CVA6ConfigWtDcacheWbufDepth; + // }}} + + // Definition of constants used only in this file + // {{{ + localparam int unsigned __BYTES_PER_WAY = + CVA6ConfigDcacheByteSize/CVA6ConfigDcacheSetAssoc; + + localparam int unsigned __BYTES_PER_CACHELINE = + CVA6ConfigDcacheLineWidth/8; + // }}} + + // Definition of global constants for the HPDcache data and directory + // {{{ + // HPDcache physical address width (in bits) + localparam int unsigned PARAM_PA_WIDTH = riscv::PLEN; + + // HPDcache number of sets + localparam int unsigned PARAM_SETS = __BYTES_PER_WAY/__BYTES_PER_CACHELINE; + + // HPDcache number of ways + localparam int unsigned PARAM_WAYS = CVA6ConfigDcacheSetAssoc; + + // HPDcache word width (bits) + localparam int unsigned PARAM_WORD_WIDTH = CVA6ConfigXlen; + + // HPDcache cache-line width (bits) + localparam int unsigned PARAM_CL_WORDS = CVA6ConfigDcacheLineWidth/PARAM_WORD_WIDTH; + + // HPDcache number of words in the request data channels (request and response) + `ifndef CONF_HPDCACHE_REQ_WORDS + `define CONF_HPDCACHE_REQ_WORDS 1 + `endif + localparam int unsigned PARAM_REQ_WORDS = `CONF_HPDCACHE_REQ_WORDS; + + // HPDcache request transaction ID width (bits) + localparam int unsigned PARAM_REQ_TRANS_ID_WIDTH = CVA6ConfigDcacheIdWidth; + + // HPDcache request source ID width (bits) + `ifndef CONF_HPDCACHE_REQ_SRC_ID_WIDTH + `define CONF_HPDCACHE_REQ_SRC_ID_WIDTH 3 + `endif + localparam int unsigned PARAM_REQ_SRC_ID_WIDTH = `CONF_HPDCACHE_REQ_SRC_ID_WIDTH; + + // HPDcache physically indexed + `ifndef CONF_HPDCACHE_PHYSICALLY_INDEXED + `define CONF_HPDCACHE_PHYSICALLY_INDEXED 1'b0 + `endif + localparam bit PARAM_PHYSICALLY_INDEXED = `CONF_HPDCACHE_PHYSICALLY_INDEXED; + // }}} + + // Definition of constants and types for HPDcache data memory + // {{{ + `ifndef CONF_HPDCACHE_DATA_WAYS_PER_RAM_WORD + `define CONF_HPDCACHE_DATA_WAYS_PER_RAM_WORD 128/PARAM_WORD_WIDTH + `endif + localparam int unsigned PARAM_DATA_WAYS_PER_RAM_WORD = `CONF_HPDCACHE_DATA_WAYS_PER_RAM_WORD; + + `ifndef CONF_HPDCACHE_DATA_SETS_PER_RAM + `define CONF_HPDCACHE_DATA_SETS_PER_RAM PARAM_SETS + `endif + localparam int unsigned PARAM_DATA_SETS_PER_RAM = `CONF_HPDCACHE_DATA_SETS_PER_RAM; + + // HPDcache DATA RAM implements write byte enable + `ifndef CONF_HPDCACHE_DATA_RAM_WBYTEENABLE + `define CONF_HPDCACHE_DATA_RAM_WBYTEENABLE 1'b0 + `endif + localparam bit PARAM_DATA_RAM_WBYTEENABLE = `CONF_HPDCACHE_DATA_RAM_WBYTEENABLE; + + // Define the number of memory contiguous words that can be accessed + // simultaneously from the cache. + // - This limits the maximum width for the data channel from requesters + // - This impacts the refill latency + `ifndef CONF_HPDCACHE_ACCESS_WORDS + `define CONF_HPDCACHE_ACCESS_WORDS PARAM_CL_WORDS/2 + `endif + localparam int unsigned PARAM_ACCESS_WORDS = `CONF_HPDCACHE_ACCESS_WORDS; + // }}} + + // Definition of constants and types for the Miss Status Holding Register (MSHR) + // {{{ + `ifndef CONF_HPDCACHE_MSHR_SETS + `define CONF_HPDCACHE_MSHR_SETS 2 + `endif + localparam int unsigned PARAM_MSHR_SETS = `CONF_HPDCACHE_MSHR_SETS; + + // HPDcache MSHR number of ways + `ifndef CONF_HPDCACHE_MSHR_WAYS + `define CONF_HPDCACHE_MSHR_WAYS 4 + `endif + localparam int unsigned PARAM_MSHR_WAYS = `CONF_HPDCACHE_MSHR_WAYS; + + // HPDcache MSHR number of ways in the same SRAM word + `ifndef CONF_HPDCACHE_MSHR_WAYS_PER_RAM_WORD + `define CONF_HPDCACHE_MSHR_WAYS_PER_RAM_WORD 2 + `endif + localparam int unsigned PARAM_MSHR_WAYS_PER_RAM_WORD = `CONF_HPDCACHE_MSHR_WAYS_PER_RAM_WORD; + + // HPDcache MSHR number of sets in the same SRAM + `ifndef CONF_HPDCACHE_MSHR_SETS_PER_RAM + `define CONF_HPDCACHE_MSHR_SETS_PER_RAM PARAM_MSHR_SETS + `endif + localparam int unsigned PARAM_MSHR_SETS_PER_RAM = `CONF_HPDCACHE_MSHR_SETS_PER_RAM; + + // HPDcache MSHR implements write byte enable + `ifndef CONF_HPDCACHE_MSHR_RAM_WBYTEENABLE + `define CONF_HPDCACHE_MSHR_RAM_WBYTEENABLE 1'b0 + `endif + localparam bit PARAM_MSHR_RAM_WBYTEENABLE = `CONF_HPDCACHE_MSHR_RAM_WBYTEENABLE; + + `ifndef CONF_HPDCACHE_MSHR_USE_REGBANK + `define CONF_HPDCACHE_MSHR_USE_REGBANK 0 + `endif + localparam bit PARAM_MSHR_USE_REGBANK = `CONF_HPDCACHE_MSHR_USE_REGBANK; + // }}} + + // Definition of constants and types for the Write Buffer (WBUF) + // {{{ + `ifndef CONF_HPDCACHE_WBUF_DATA_ENTRIES + `define __WBUF_DATA_ENTRIES_DESIRED (CVA6ConfigWtDcacheWbufDepth/2) + `define __WBUF_DATA_ENTRIES_MAX (CVA6ConfigWtDcacheWbufDepth) + `define CONF_HPDCACHE_WBUF_DATA_ENTRIES \ + ((`__WBUF_DATA_ENTRIES_DESIRED) < 1 ? 1 : \ + ((`__WBUF_DATA_ENTRIES_DESIRED) < (`__WBUF_DATA_ENTRIES_MAX) ? \ + (`__WBUF_DATA_ENTRIES_DESIRED) : (`__WBUF_DATA_ENTRIES_MAX))) + `endif + localparam int unsigned PARAM_WBUF_DIR_ENTRIES = CVA6ConfigWtDcacheWbufDepth; + localparam int unsigned PARAM_WBUF_DATA_ENTRIES = `CONF_HPDCACHE_WBUF_DATA_ENTRIES; + + `ifndef CONF_HPDCACHE_WBUF_WORDS + `define CONF_HPDCACHE_WBUF_WORDS PARAM_REQ_WORDS + `endif + localparam int unsigned PARAM_WBUF_WORDS = `CONF_HPDCACHE_WBUF_WORDS; + + `ifndef CONF_HPDCACHE_WBUF_TIMECNT_WIDTH + `define CONF_HPDCACHE_WBUF_TIMECNT_WIDTH 4 + `endif + localparam int unsigned PARAM_WBUF_TIMECNT_WIDTH = `CONF_HPDCACHE_WBUF_TIMECNT_WIDTH; + // }}} + + // Definition of constants and types for the Replay Table (RTAB) + // {{{ + `ifndef CONF_HPDCACHE_RTAB_ENTRIES + `define CONF_HPDCACHE_RTAB_ENTRIES 4 + `endif + localparam int PARAM_RTAB_ENTRIES = `CONF_HPDCACHE_RTAB_ENTRIES; + // }}} + +endpackage diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/target/cva6/cva6_hpdcache_subsystem.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/target/cva6/cva6_hpdcache_subsystem.sv new file mode 100644 index 00000000000..68993958f7d --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/target/cva6/cva6_hpdcache_subsystem.sv @@ -0,0 +1,601 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : June, 2022 + * Description : CVA6 cache subsystem integrating standard CVA6's + * instruction cache and the Core-V High-Performance L1 +* data cache (CV-HPDcache). + * History : + */ +module cva6_hpdcache_subsystem +// Parameters +// {{{ +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter int NumPorts = 4, + parameter int NrHwPrefetchers = 4, + parameter type noc_req_t = logic, + parameter type noc_resp_t = logic, + parameter type cmo_req_t = logic, + parameter type cmo_rsp_t = logic +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // I$ + // {{{ + input logic icache_en_i, // enable icache (or bypass e.g: in debug mode) + input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together + output logic icache_miss_o, // to performance counter + // address translation requests + input ariane_pkg::icache_areq_t icache_areq_i, // to/from frontend + output ariane_pkg::icache_arsp_t icache_areq_o, + // data requests + input ariane_pkg::icache_dreq_t icache_dreq_i, // to/from frontend + output ariane_pkg::icache_drsp_t icache_dreq_o, + // }}} + + // D$ + // {{{ + // Cache management + input logic dcache_enable_i, // from CSR + input logic dcache_flush_i, // high until acknowledged + output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed + output logic dcache_miss_o, // we missed on a ld/st + + // AMO interface + input ariane_pkg::amo_req_t dcache_amo_req_i, // from LSU + output ariane_pkg::amo_resp_t dcache_amo_resp_o, // to LSU + // CMO interface + input cmo_req_t dcache_cmo_req_i, // from CMO FU + output cmo_rsp_t dcache_cmo_resp_o, // to CMO FU + // Request ports + input ariane_pkg::dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // from LSU + output ariane_pkg::dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to LSU + // Write Buffer status + output logic wbuffer_empty_o, + output logic wbuffer_not_ni_o, + + // Hardware memory prefetcher configuration + input logic [NrHwPrefetchers-1:0] hwpf_base_set_i, + input logic [NrHwPrefetchers-1:0][63:0] hwpf_base_i, + output logic [NrHwPrefetchers-1:0][63:0] hwpf_base_o, + input logic [NrHwPrefetchers-1:0] hwpf_param_set_i, + input logic [NrHwPrefetchers-1:0][63:0] hwpf_param_i, + output logic [NrHwPrefetchers-1:0][63:0] hwpf_param_o, + input logic [NrHwPrefetchers-1:0] hwpf_throttle_set_i, + input logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_i, + output logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_o, + output logic [63:0] hwpf_status_o, + // }}} + + // AXI port to upstream memory/peripherals + // {{{ + output noc_req_t noc_req_o, + input noc_resp_t noc_resp_i + // }}} +); +// }}} + + `include "axi/typedef.svh" + + // I$ instantiation + // {{{ + logic icache_miss_valid, icache_miss_ready; + wt_cache_pkg::icache_req_t icache_miss; + + logic icache_miss_resp_valid; + wt_cache_pkg::icache_rtrn_t icache_miss_resp; + + localparam int ICACHE_RDTXID = 1 << (ariane_pkg::MEM_TID_WIDTH - 1); + + cva6_icache #( + .CVA6Cfg (CVA6Cfg), + .RdTxId (ICACHE_RDTXID) + ) i_cva6_icache ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i (icache_flush_i), + .en_i (icache_en_i), + .miss_o (icache_miss_o), + .areq_i (icache_areq_i), + .areq_o (icache_areq_o), + .dreq_i (icache_dreq_i), + .dreq_o (icache_dreq_o), + .mem_rtrn_vld_i (icache_miss_resp_valid), + .mem_rtrn_i (icache_miss_resp), + .mem_data_req_o (icache_miss_valid), + .mem_data_ack_i (icache_miss_ready), + .mem_data_o (icache_miss) + ); + // }}} + + // D$ instantiation + // {{{ + `include "hpdcache_typedef.svh" + + // 0: Page-Table Walk (PTW) + // 1: Load unit + // 2: Accelerator load + // 3: Store/AMO + // . + // . + // . + // NumPorts: CMO + // NumPorts + 1: Hardware Memory Prefetcher (hwpf) + localparam int HPDCACHE_NREQUESTERS = NumPorts + 2; + + typedef logic [CVA6Cfg.AxiAddrWidth-1:0] hpdcache_mem_addr_t; + typedef logic [ariane_pkg::MEM_TID_WIDTH-1:0] hpdcache_mem_id_t; + typedef logic [CVA6Cfg.AxiDataWidth-1:0] hpdcache_mem_data_t; + typedef logic [CVA6Cfg.AxiDataWidth/8-1:0] hpdcache_mem_be_t; + `HPDCACHE_TYPEDEF_MEM_REQ_T(hpdcache_mem_req_t, hpdcache_mem_addr_t, hpdcache_mem_id_t); + `HPDCACHE_TYPEDEF_MEM_RESP_R_T(hpdcache_mem_resp_r_t, hpdcache_mem_id_t, hpdcache_mem_data_t); + `HPDCACHE_TYPEDEF_MEM_REQ_W_T(hpdcache_mem_req_w_t, hpdcache_mem_data_t, hpdcache_mem_be_t); + `HPDCACHE_TYPEDEF_MEM_RESP_W_T(hpdcache_mem_resp_w_t, hpdcache_mem_id_t); + + typedef logic [63:0] hwpf_stride_param_t; + + logic dcache_req_valid [HPDCACHE_NREQUESTERS-1:0]; + logic dcache_req_ready [HPDCACHE_NREQUESTERS-1:0]; + hpdcache_pkg::hpdcache_req_t dcache_req [HPDCACHE_NREQUESTERS-1:0]; + logic dcache_req_abort [HPDCACHE_NREQUESTERS-1:0]; + hpdcache_pkg::hpdcache_tag_t dcache_req_tag [HPDCACHE_NREQUESTERS-1:0]; + hpdcache_pkg::hpdcache_pma_t dcache_req_pma [HPDCACHE_NREQUESTERS-1:0]; + logic dcache_rsp_valid [HPDCACHE_NREQUESTERS-1:0]; + hpdcache_pkg::hpdcache_rsp_t dcache_rsp [HPDCACHE_NREQUESTERS-1:0]; + logic dcache_read_miss, dcache_write_miss; + + logic [2:0] snoop_valid; + logic [2:0] snoop_abort; + hpdcache_pkg::hpdcache_req_offset_t [2:0] snoop_addr_offset; + hpdcache_pkg::hpdcache_tag_t [2:0] snoop_addr_tag; + logic [2:0] snoop_phys_indexed; + + logic dcache_cmo_req_is_prefetch; + + logic dcache_miss_ready; + logic dcache_miss_valid; + hpdcache_mem_req_t dcache_miss; + + logic dcache_miss_resp_ready; + logic dcache_miss_resp_valid; + hpdcache_mem_resp_r_t dcache_miss_resp; + + logic dcache_wbuf_ready; + logic dcache_wbuf_valid; + hpdcache_mem_req_t dcache_wbuf; + + logic dcache_wbuf_data_ready; + logic dcache_wbuf_data_valid; + hpdcache_mem_req_w_t dcache_wbuf_data; + + logic dcache_wbuf_resp_ready; + logic dcache_wbuf_resp_valid; + hpdcache_mem_resp_w_t dcache_wbuf_resp; + + logic dcache_uc_read_ready; + logic dcache_uc_read_valid; + hpdcache_mem_req_t dcache_uc_read; + + logic dcache_uc_read_resp_ready; + logic dcache_uc_read_resp_valid; + hpdcache_mem_resp_r_t dcache_uc_read_resp; + + logic dcache_uc_write_ready; + logic dcache_uc_write_valid; + hpdcache_mem_req_t dcache_uc_write; + + logic dcache_uc_write_data_ready; + logic dcache_uc_write_data_valid; + hpdcache_mem_req_w_t dcache_uc_write_data; + + logic dcache_uc_write_resp_ready; + logic dcache_uc_write_resp_valid; + hpdcache_mem_resp_w_t dcache_uc_write_resp; + + hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_in; + hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_out; + + generate + ariane_pkg::dcache_req_i_t dcache_req_ports[HPDCACHE_NREQUESTERS-1:0]; + + for (genvar r = 0; r < (NumPorts-1); r++) begin : cva6_hpdcache_load_if_adapter_gen + assign dcache_req_ports[r] = dcache_req_ports_i[r]; + + cva6_hpdcache_if_adapter #( + .CVA6Cfg (CVA6Cfg), + .is_load_port (1'b1) + ) i_cva6_hpdcache_load_if_adapter ( + .clk_i, + .rst_ni, + + .hpdcache_req_sid_i (hpdcache_pkg::hpdcache_req_sid_t'(r)), + + .cva6_req_i (dcache_req_ports[r]), + .cva6_req_o (dcache_req_ports_o[r]), + .cva6_amo_req_i ('0), + .cva6_amo_resp_o (/* unused */), + + .hpdcache_req_valid_o (dcache_req_valid[r]), + .hpdcache_req_ready_i (dcache_req_ready[r]), + .hpdcache_req_o (dcache_req[r]), + .hpdcache_req_abort_o (dcache_req_abort[r]), + .hpdcache_req_tag_o (dcache_req_tag[r]), + .hpdcache_req_pma_o (dcache_req_pma[r]), + + .hpdcache_rsp_valid_i (dcache_rsp_valid[r]), + .hpdcache_rsp_i (dcache_rsp[r]) + ); + end + + cva6_hpdcache_if_adapter #( + .CVA6Cfg (CVA6Cfg), + .is_load_port (1'b0) + ) i_cva6_hpdcache_store_if_adapter ( + .clk_i, + .rst_ni, + + .hpdcache_req_sid_i (hpdcache_pkg::hpdcache_req_sid_t'(NumPorts-1)), + + .cva6_req_i (dcache_req_ports_i[NumPorts-1]), + .cva6_req_o (dcache_req_ports_o[NumPorts-1]), + .cva6_amo_req_i (dcache_amo_req_i), + .cva6_amo_resp_o (dcache_amo_resp_o), + + .hpdcache_req_valid_o (dcache_req_valid[NumPorts-1]), + .hpdcache_req_ready_i (dcache_req_ready[NumPorts-1]), + .hpdcache_req_o (dcache_req[NumPorts-1]), + .hpdcache_req_abort_o (dcache_req_abort[NumPorts-1]), + .hpdcache_req_tag_o (dcache_req_tag[NumPorts-1]), + .hpdcache_req_pma_o (dcache_req_pma[NumPorts-1]), + + .hpdcache_rsp_valid_i (dcache_rsp_valid[NumPorts-1]), + .hpdcache_rsp_i (dcache_rsp[NumPorts-1]) + ); + +`ifdef HPDCACHE_ENABLE_CMO + cva6_hpdcache_cmo_if_adapter #( + .cmo_req_t (cmo_req_t), + .cmo_rsp_t (cmo_rsp_t) + ) i_cva6_hpdcache_cmo_if_adapter ( + .clk_i, + .rst_ni, + + .dcache_req_sid_i (hpdcache_pkg::hpdcache_req_sid_t'(NumPorts)), + + .cva6_cmo_req_i (dcache_cmo_req_i), + .cva6_cmo_resp_o (dcache_cmo_resp_o), + + .dcache_req_valid_o (dcache_req_valid[NumPorts]), + .dcache_req_ready_i (dcache_req_ready[NumPorts]), + .dcache_req_o (dcache_req[NumPorts]), + .dcache_req_abort_o (dcache_req_abort[NumPorts]), + .dcache_req_tag_o (dcache_req_tag[NumPorts]), + .dcache_req_pma_o (dcache_req_pma[NumPorts]), + + .dcache_rsp_valid_i (dcache_rsp_valid[NumPorts]), + .dcache_rsp_i (dcache_rsp[NumPorts]) + ); +`else + assign dcache_req_valid[NumPorts] = 1'b0, + dcache_req [NumPorts] = '0, + dcache_req_abort[NumPorts] = 1'b0, + dcache_req_tag [NumPorts] = '0, + dcache_req_pma [NumPorts] = '0; +`endif + endgenerate + + // Snoop load port + assign snoop_valid[0] = dcache_req_valid[1] & dcache_req_ready[1], + snoop_abort[0] = dcache_req_abort[1], + snoop_addr_offset[0] = dcache_req[1].addr_offset, + snoop_addr_tag[0] = dcache_req_tag[1], + snoop_phys_indexed[0] = dcache_req[1].phys_indexed; + + // Snoop Store/AMO port + assign snoop_valid[1] = dcache_req_valid[NumPorts-1] & dcache_req_ready[NumPorts-1], + snoop_abort[1] = dcache_req_abort[NumPorts-1], + snoop_addr_offset[1] = dcache_req[NumPorts-1].addr_offset, + snoop_addr_tag[1] = dcache_req_tag[NumPorts-1], + snoop_phys_indexed[1] = dcache_req[NumPorts-1].phys_indexed; + +`ifdef HPDCACHE_ENABLE_CMO + // Snoop CMO port (in case of read prefetch accesses) + assign dcache_cmo_req_is_prefetch = + hpdcache_pkg::is_cmo_prefetch(dcache_req[NumPorts].op, dcache_req[NumPorts].size); + assign snoop_valid[2] = dcache_req_valid[NumPorts] + & dcache_req_ready[NumPorts] + & dcache_cmo_req_is_prefetch, + snoop_abort[2] = dcache_req_abort[NumPorts], + snoop_addr_offset[2] = dcache_req[NumPorts].addr_offset, + snoop_addr_tag[2] = dcache_req_tag[NumPorts], + snoop_phys_indexed[2] = dcache_req[NumPorts].phys_indexed; +`else + assign snoop_valid[2] = 1'b0, + snoop_abort[2] = 1'b0, + snoop_addr_offset[2] = '0, + snoop_addr_tag[2] = '0, + snoop_phys_indexed[2] = 1'b0; +`endif + + generate + for (genvar h = 0; h < NrHwPrefetchers; h++) begin : hwpf_throttle_gen + assign hwpf_throttle_in[h] = hwpf_stride_pkg::hwpf_stride_throttle_t'(hwpf_throttle_i[h]), + hwpf_throttle_o[h] = hwpf_stride_pkg::hwpf_stride_param_t'(hwpf_throttle_out[h]); + end + endgenerate + + hwpf_stride_wrapper #( + .NUM_HW_PREFETCH (NrHwPrefetchers), + .NUM_SNOOP_PORTS (3) + ) i_hwpf_stride_wrapper ( + .clk_i, + .rst_ni, + + .hwpf_stride_base_set_i (hwpf_base_set_i), + .hwpf_stride_base_i (hwpf_base_i), + .hwpf_stride_base_o (hwpf_base_o), + .hwpf_stride_param_set_i (hwpf_param_set_i), + .hwpf_stride_param_i (hwpf_param_i), + .hwpf_stride_param_o (hwpf_param_o), + .hwpf_stride_throttle_set_i (hwpf_throttle_set_i), + .hwpf_stride_throttle_i (hwpf_throttle_in), + .hwpf_stride_throttle_o (hwpf_throttle_out), + .hwpf_stride_status_o (hwpf_status_o), + + .snoop_valid_i (snoop_valid), + .snoop_abort_i (snoop_abort), + .snoop_addr_offset_i (snoop_addr_offset), + .snoop_addr_tag_i (snoop_addr_tag), + .snoop_phys_indexed_i (snoop_phys_indexed), + + .hpdcache_req_sid_i (hpdcache_pkg::hpdcache_req_sid_t'(NumPorts+1)), + + .hpdcache_req_valid_o (dcache_req_valid[NumPorts+1]), + .hpdcache_req_ready_i (dcache_req_ready[NumPorts+1]), + .hpdcache_req_o (dcache_req[NumPorts+1]), + .hpdcache_req_abort_o (dcache_req_abort[NumPorts+1]), + .hpdcache_req_tag_o (dcache_req_tag[NumPorts+1]), + .hpdcache_req_pma_o (dcache_req_pma[NumPorts+1]), + .hpdcache_rsp_valid_i (dcache_rsp_valid[NumPorts+1]), + .hpdcache_rsp_i (dcache_rsp[NumPorts+1]) + ); + + hpdcache #( + .NREQUESTERS (HPDCACHE_NREQUESTERS), + .HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH), + .HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth), + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), + .hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t), + .hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t) + ) i_hpdcache( + .clk_i, + .rst_ni, + + .wbuf_flush_i (dcache_flush_i), + + .core_req_valid_i (dcache_req_valid), + .core_req_ready_o (dcache_req_ready), + .core_req_i (dcache_req), + .core_req_abort_i (dcache_req_abort), + .core_req_tag_i (dcache_req_tag), + .core_req_pma_i (dcache_req_pma), + + .core_rsp_valid_o (dcache_rsp_valid), + .core_rsp_o (dcache_rsp), + + .mem_req_miss_read_ready_i (dcache_miss_ready), + .mem_req_miss_read_valid_o (dcache_miss_valid), + .mem_req_miss_read_o (dcache_miss), + + .mem_resp_miss_read_ready_o (dcache_miss_resp_ready), + .mem_resp_miss_read_valid_i (dcache_miss_resp_valid), + .mem_resp_miss_read_i (dcache_miss_resp), + + .mem_req_wbuf_write_ready_i (dcache_wbuf_ready), + .mem_req_wbuf_write_valid_o (dcache_wbuf_valid), + .mem_req_wbuf_write_o (dcache_wbuf), + + .mem_req_wbuf_write_data_ready_i (dcache_wbuf_data_ready), + .mem_req_wbuf_write_data_valid_o (dcache_wbuf_data_valid), + .mem_req_wbuf_write_data_o (dcache_wbuf_data), + + .mem_resp_wbuf_write_ready_o (dcache_wbuf_resp_ready), + .mem_resp_wbuf_write_valid_i (dcache_wbuf_resp_valid), + .mem_resp_wbuf_write_i (dcache_wbuf_resp), + + .mem_req_uc_read_ready_i (dcache_uc_read_ready), + .mem_req_uc_read_valid_o (dcache_uc_read_valid), + .mem_req_uc_read_o (dcache_uc_read), + + .mem_resp_uc_read_ready_o (dcache_uc_read_resp_ready), + .mem_resp_uc_read_valid_i (dcache_uc_read_resp_valid), + .mem_resp_uc_read_i (dcache_uc_read_resp), + + .mem_req_uc_write_ready_i (dcache_uc_write_ready), + .mem_req_uc_write_valid_o (dcache_uc_write_valid), + .mem_req_uc_write_o (dcache_uc_write), + + .mem_req_uc_write_data_ready_i (dcache_uc_write_data_ready), + .mem_req_uc_write_data_valid_o (dcache_uc_write_data_valid), + .mem_req_uc_write_data_o (dcache_uc_write_data), + + .mem_resp_uc_write_ready_o (dcache_uc_write_resp_ready), + .mem_resp_uc_write_valid_i (dcache_uc_write_resp_valid), + .mem_resp_uc_write_i (dcache_uc_write_resp), + + .evt_cache_write_miss_o (dcache_write_miss), + .evt_cache_read_miss_o (dcache_read_miss), + .evt_uncached_req_o (/* unused */), + .evt_cmo_req_o (/* unused */), + .evt_write_req_o (/* unused */), + .evt_read_req_o (/* unused */), + .evt_prefetch_req_o (/* unused */), + .evt_req_on_hold_o (/* unused */), + .evt_rtab_rollback_o (/* unused */), + .evt_stall_refill_o (/* unused */), + .evt_stall_o (/* unused */), + + .wbuf_empty_o (wbuffer_empty_o), + + .cfg_enable_i (dcache_enable_i), + .cfg_wbuf_threshold_i (4'd2), + .cfg_wbuf_reset_timecnt_on_write_i (1'b1), + .cfg_wbuf_sequential_waw_i (1'b0), + .cfg_wbuf_inhibit_write_coalescing_i (1'b0), + .cfg_prefetch_updt_plru_i (1'b1), + .cfg_error_on_cacheable_amo_i (1'b0), + .cfg_rtab_single_entry_i (1'b0) + ); + + assign dcache_miss_o = dcache_read_miss, + wbuffer_not_ni_o = wbuffer_empty_o; + + always_ff @(posedge clk_i or negedge rst_ni) + begin : dcache_flush_ff + if (!rst_ni) dcache_flush_ack_o <= 1'b0; + else dcache_flush_ack_o <= ~dcache_flush_ack_o & dcache_flush_i; + end + + // }}} + + // AXI arbiter instantiation + // {{{ + typedef logic [CVA6Cfg.AxiAddrWidth-1:0] axi_addr_t; + typedef logic [CVA6Cfg.AxiDataWidth-1:0] axi_data_t; + typedef logic [CVA6Cfg.AxiDataWidth/8-1:0] axi_strb_t; + typedef logic [CVA6Cfg.AxiIdWidth-1:0] axi_id_t; + typedef logic [CVA6Cfg.AxiUserWidth-1:0] axi_user_t; + `AXI_TYPEDEF_AW_CHAN_T(axi_aw_chan_t, axi_addr_t, axi_id_t, axi_user_t) + `AXI_TYPEDEF_W_CHAN_T(axi_w_chan_t, axi_data_t, axi_strb_t, axi_user_t) + `AXI_TYPEDEF_B_CHAN_T(axi_b_chan_t, axi_id_t, axi_user_t) + `AXI_TYPEDEF_AR_CHAN_T(axi_ar_chan_t, axi_addr_t, axi_id_t, axi_user_t) + `AXI_TYPEDEF_R_CHAN_T(axi_r_chan_t, axi_data_t, axi_id_t, axi_user_t) + + cva6_hpdcache_subsystem_axi_arbiter #( + .HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH), + .HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth), + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), + .hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t), + .hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t), + + .AxiAddrWidth (CVA6Cfg.AxiAddrWidth), + .AxiDataWidth (CVA6Cfg.AxiDataWidth), + .AxiIdWidth (CVA6Cfg.AxiIdWidth), + .AxiUserWidth (CVA6Cfg.AxiUserWidth), + .axi_ar_chan_t (axi_ar_chan_t), + .axi_aw_chan_t (axi_aw_chan_t), + .axi_w_chan_t (axi_w_chan_t), + .axi_req_t (noc_req_t), + .axi_rsp_t (noc_resp_t) + ) i_axi_arbiter ( + .clk_i, + .rst_ni, + + .icache_miss_valid_i (icache_miss_valid), + .icache_miss_ready_o (icache_miss_ready), + .icache_miss_i (icache_miss), + .icache_miss_id_i (hpdcache_mem_id_t'(ICACHE_RDTXID)), + + .icache_miss_resp_valid_o (icache_miss_resp_valid), + .icache_miss_resp_o (icache_miss_resp), + + .dcache_miss_ready_o (dcache_miss_ready), + .dcache_miss_valid_i (dcache_miss_valid), + .dcache_miss_i (dcache_miss), + + .dcache_miss_resp_ready_i (dcache_miss_resp_ready), + .dcache_miss_resp_valid_o (dcache_miss_resp_valid), + .dcache_miss_resp_o (dcache_miss_resp), + + .dcache_wbuf_ready_o (dcache_wbuf_ready), + .dcache_wbuf_valid_i (dcache_wbuf_valid), + .dcache_wbuf_i (dcache_wbuf), + + .dcache_wbuf_data_ready_o (dcache_wbuf_data_ready), + .dcache_wbuf_data_valid_i (dcache_wbuf_data_valid), + .dcache_wbuf_data_i (dcache_wbuf_data), + + .dcache_wbuf_resp_ready_i (dcache_wbuf_resp_ready), + .dcache_wbuf_resp_valid_o (dcache_wbuf_resp_valid), + .dcache_wbuf_resp_o (dcache_wbuf_resp), + + .dcache_uc_read_ready_o (dcache_uc_read_ready), + .dcache_uc_read_valid_i (dcache_uc_read_valid), + .dcache_uc_read_i (dcache_uc_read), + .dcache_uc_read_id_i ('1), + + .dcache_uc_read_resp_ready_i (dcache_uc_read_resp_ready), + .dcache_uc_read_resp_valid_o (dcache_uc_read_resp_valid), + .dcache_uc_read_resp_o (dcache_uc_read_resp), + + .dcache_uc_write_ready_o (dcache_uc_write_ready), + .dcache_uc_write_valid_i (dcache_uc_write_valid), + .dcache_uc_write_i (dcache_uc_write), + .dcache_uc_write_id_i ('1), + + .dcache_uc_write_data_ready_o (dcache_uc_write_data_ready), + .dcache_uc_write_data_valid_i (dcache_uc_write_data_valid), + .dcache_uc_write_data_i (dcache_uc_write_data), + + .dcache_uc_write_resp_ready_i (dcache_uc_write_resp_ready), + .dcache_uc_write_resp_valid_o (dcache_uc_write_resp_valid), + .dcache_uc_write_resp_o (dcache_uc_write_resp), + + .axi_req_o (noc_req_o), + .axi_resp_i (noc_resp_i) + ); + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial assert (hpdcache_pkg::HPDCACHE_REQ_SRC_ID_WIDTH >= $clog2(HPDCACHE_NREQUESTERS)) + else $fatal(1, "HPDCACHE_REQ_SRC_ID_WIDTH is not wide enough"); + + a_invalid_instruction_fetch: assert property ( + @(posedge clk_i) disable iff (!rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX) + else $warning(1,"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X", + icache_dreq_o.vaddr, icache_dreq_o.data); + + a_invalid_write_data: assert property ( + @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_i[2].data_req |-> |dcache_req_ports_i[2].data_be |-> (|dcache_req_ports_i[2].data_wdata) !== 1'hX) + else $warning(1,"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X", + {dcache_req_ports_i[2].address_tag, dcache_req_ports_i[2].address_index}, dcache_req_ports_i[2].data_be, dcache_req_ports_i[2].data_wdata); + + for (genvar j=0; j<2; j++) begin : gen_assertion + a_invalid_read_data: assert property ( + @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_o[j].data_rvalid && ~dcache_req_ports_i[j].kill_req |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX) + else $warning(1,"[l1 dcache] reading invalid data on port %01d: data=%016X", + j, dcache_req_ports_o[j].data_rdata); + end + // pragma translate_on + // }}} + +endmodule : cva6_hpdcache_subsystem diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/target/cva6/cva6_hpdcache_subsystem_axi_arbiter.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/target/cva6/cva6_hpdcache_subsystem_axi_arbiter.sv new file mode 100644 index 00000000000..ae3dd04edb3 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/target/cva6/cva6_hpdcache_subsystem_axi_arbiter.sv @@ -0,0 +1,593 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date: June, 2022 + * Description : AXI arbiter for the CVA6 cache subsystem integrating standard + * CVA6's instruction cache and the Core-V High-Performance +* L1 Dcache (CV-HPDcache). + * History : + */ +module cva6_hpdcache_subsystem_axi_arbiter +// Parameters +// {{{ +#( + parameter int HPDcacheMemIdWidth = 8, + parameter int HPDcacheMemDataWidth = 512, + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_req_w_t = logic, + parameter type hpdcache_mem_resp_r_t = logic, + parameter type hpdcache_mem_resp_w_t = logic, + + parameter int unsigned AxiAddrWidth = 1, + parameter int unsigned AxiDataWidth = 1, + parameter int unsigned AxiIdWidth = 1, + parameter int unsigned AxiUserWidth = 1, + parameter type axi_ar_chan_t = logic, + parameter type axi_aw_chan_t = logic, + parameter type axi_w_chan_t = logic, + parameter type axi_req_t = logic, + parameter type axi_rsp_t = logic, + + localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0] +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + // Interfaces from/to I$ + // {{{ + input logic icache_miss_valid_i, + output logic icache_miss_ready_o, + input wt_cache_pkg::icache_req_t icache_miss_i, + input hpdcache_mem_id_t icache_miss_id_i, + + output logic icache_miss_resp_valid_o, + output wt_cache_pkg::icache_rtrn_t icache_miss_resp_o, + // }}} + + // Interfaces from/to D$ + // {{{ + output logic dcache_miss_ready_o, + input logic dcache_miss_valid_i, + input hpdcache_mem_req_t dcache_miss_i, + + input logic dcache_miss_resp_ready_i, + output logic dcache_miss_resp_valid_o, + output hpdcache_mem_resp_r_t dcache_miss_resp_o, + + // Write-buffer write interface + output logic dcache_wbuf_ready_o, + input logic dcache_wbuf_valid_i, + input hpdcache_mem_req_t dcache_wbuf_i, + + output logic dcache_wbuf_data_ready_o, + input logic dcache_wbuf_data_valid_i, + input hpdcache_mem_req_w_t dcache_wbuf_data_i, + + input logic dcache_wbuf_resp_ready_i, + output logic dcache_wbuf_resp_valid_o, + output hpdcache_mem_resp_w_t dcache_wbuf_resp_o, + + // Uncached read interface + output logic dcache_uc_read_ready_o, + input logic dcache_uc_read_valid_i, + input hpdcache_mem_req_t dcache_uc_read_i, + input hpdcache_mem_id_t dcache_uc_read_id_i, + + input logic dcache_uc_read_resp_ready_i, + output logic dcache_uc_read_resp_valid_o, + output hpdcache_mem_resp_r_t dcache_uc_read_resp_o, + + // Uncached write interface + output logic dcache_uc_write_ready_o, + input logic dcache_uc_write_valid_i, + input hpdcache_mem_req_t dcache_uc_write_i, + input hpdcache_mem_id_t dcache_uc_write_id_i, + + output logic dcache_uc_write_data_ready_o, + input logic dcache_uc_write_data_valid_i, + input hpdcache_mem_req_w_t dcache_uc_write_data_i, + + input logic dcache_uc_write_resp_ready_i, + output logic dcache_uc_write_resp_valid_o, + output hpdcache_mem_resp_w_t dcache_uc_write_resp_o, + // }}} + + // AXI port to upstream memory/peripherals + // {{{ + output axi_req_t axi_req_o, + input axi_rsp_t axi_resp_i + // }}} +); +// }}} + + // Internal type definitions + // {{{ + typedef struct packed { + logic [AxiIdWidth-1:0] id; + logic [AxiDataWidth-1:0] data; + axi_pkg::resp_t resp; + logic last; + logic [AxiUserWidth-1:0] user; + } axi_r_chan_t; + + typedef struct packed { + logic [AxiIdWidth-1:0] id; + axi_pkg::resp_t resp; + logic [AxiUserWidth-1:0] user; + } axi_b_chan_t; + + localparam int MEM_RESP_RT_DEPTH = (1 << HPDcacheMemIdWidth); + typedef hpdcache_mem_id_t [MEM_RESP_RT_DEPTH-1:0] mem_resp_rt_t; + typedef logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0] icache_resp_data_t; + // }}} + + // Adapt the I$ interface to the HPDcache memory interface + // {{{ + localparam int ICACHE_CL_WORDS = ariane_pkg::ICACHE_LINE_WIDTH/64; + localparam int ICACHE_CL_WORD_INDEX = $clog2(ICACHE_CL_WORDS); + localparam int ICACHE_CL_SIZE = $clog2(ariane_pkg::ICACHE_LINE_WIDTH/8); + localparam int ICACHE_WORD_SIZE = 3; + localparam int ICACHE_MEM_REQ_CL_LEN = + (ariane_pkg::ICACHE_LINE_WIDTH + HPDcacheMemDataWidth - 1)/HPDcacheMemDataWidth; + localparam int ICACHE_MEM_REQ_CL_SIZE = + (HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH) ? + $clog2(HPDcacheMemDataWidth/8) : ICACHE_CL_SIZE; + + // I$ request + hpdcache_mem_req_t icache_miss_req_wdata; + logic icache_miss_req_w, icache_miss_req_wok; + + hpdcache_mem_req_t icache_miss_req_rdata; + logic icache_miss_req_r, icache_miss_req_rok; + + logic icache_miss_pending_q; + + // This FIFO has two functionnalities: + // - Stabilize the ready-valid protocol. The ICACHE can abort a valid + // transaction without receiving the corresponding ready signal. This + // behavior is not supported by AXI. + // - Cut a possible long timing path. + hpdcache_fifo_reg #( + .FIFO_DEPTH (1), + .fifo_data_t (hpdcache_mem_req_t) + ) i_icache_miss_req_fifo( + .clk_i, + .rst_ni, + + .w_i (icache_miss_req_w), + .wok_o (icache_miss_req_wok), + .wdata_i (icache_miss_req_wdata), + + .r_i (icache_miss_req_r), + .rok_o (icache_miss_req_rok), + .rdata_o (icache_miss_req_rdata) + ); + + assign icache_miss_req_w = icache_miss_valid_i, + icache_miss_ready_o = icache_miss_req_wok; + + assign icache_miss_req_wdata.mem_req_addr = icache_miss_i.paddr, + icache_miss_req_wdata.mem_req_len = icache_miss_i.nc ? 0 : ICACHE_MEM_REQ_CL_LEN - 1, + icache_miss_req_wdata.mem_req_size = icache_miss_i.nc ? ICACHE_WORD_SIZE : ICACHE_MEM_REQ_CL_SIZE, + icache_miss_req_wdata.mem_req_id = icache_miss_i.tid, + icache_miss_req_wdata.mem_req_command = hpdcache_pkg::HPDCACHE_MEM_READ, + icache_miss_req_wdata.mem_req_atomic = hpdcache_pkg::hpdcache_mem_atomic_e'(0), + icache_miss_req_wdata.mem_req_cacheable = ~icache_miss_i.nc; + + + // I$ response + logic icache_miss_resp_w, icache_miss_resp_wok; + hpdcache_mem_resp_r_t icache_miss_resp_wdata; + + logic icache_miss_resp_data_w, icache_miss_resp_data_wok; + logic icache_miss_resp_data_r, icache_miss_resp_data_rok; + icache_resp_data_t icache_miss_resp_data_rdata; + + logic icache_miss_resp_meta_w, icache_miss_resp_meta_wok; + logic icache_miss_resp_meta_r, icache_miss_resp_meta_rok; + hpdcache_mem_id_t icache_miss_resp_meta_id; + + icache_resp_data_t icache_miss_rdata; + + generate + if (HPDcacheMemDataWidth < ariane_pkg::ICACHE_LINE_WIDTH) begin + hpdcache_fifo_reg #( + .FIFO_DEPTH (1), + .fifo_data_t (hpdcache_mem_id_t) + ) i_icache_refill_meta_fifo ( + .clk_i, + .rst_ni, + + .w_i (icache_miss_resp_meta_w), + .wok_o (icache_miss_resp_meta_wok), + .wdata_i (icache_miss_resp_wdata.mem_resp_r_id), + + .r_i (icache_miss_resp_meta_r), + .rok_o (icache_miss_resp_meta_rok), + .rdata_o (icache_miss_resp_meta_id) + ); + + hpdcache_data_upsize #( + .WR_WIDTH (HPDcacheMemDataWidth), + .RD_WIDTH (ariane_pkg::ICACHE_LINE_WIDTH), + .DEPTH (1) + ) i_icache_hpdcache_data_upsize ( + .clk_i, + .rst_ni, + + .w_i (icache_miss_resp_data_w), + .wlast_i (icache_miss_resp_wdata.mem_resp_r_last), + .wok_o (icache_miss_resp_data_wok), + .wdata_i (icache_miss_resp_wdata.mem_resp_r_data), + + .r_i (icache_miss_resp_data_r), + .rok_o (icache_miss_resp_data_rok), + .rdata_o (icache_miss_resp_data_rdata) + ); + + assign icache_miss_resp_meta_r = 1'b1, + icache_miss_resp_data_r = 1'b1; + + assign icache_miss_resp_meta_w = icache_miss_resp_w & + icache_miss_resp_wdata.mem_resp_r_last; + + assign icache_miss_resp_data_w = icache_miss_resp_w; + + assign icache_miss_resp_wok = icache_miss_resp_data_wok & ( + icache_miss_resp_meta_wok | ~icache_miss_resp_wdata.mem_resp_r_last); + + assign icache_miss_rdata = icache_miss_resp_data_rdata; + + end else begin + assign icache_miss_resp_data_rok = icache_miss_resp_w; + assign icache_miss_resp_meta_rok = icache_miss_resp_w; + assign icache_miss_resp_wok = 1'b1; + assign icache_miss_resp_meta_id = icache_miss_resp_wdata.mem_resp_r_id; + assign icache_miss_resp_data_rdata = icache_miss_resp_wdata.mem_resp_r_data; + + // In the case of uncacheable accesses, the Icache expects the data to be right-aligned + always_comb + begin : icache_miss_resp_data_comb + if (!icache_miss_req_rdata.mem_req_cacheable) begin + automatic logic [ICACHE_CL_WORD_INDEX - 1: 0] icache_miss_word_index; + automatic logic [63:0] icache_miss_word; + icache_miss_word_index = icache_miss_req_rdata.mem_req_addr[3 +: ICACHE_CL_WORD_INDEX]; + icache_miss_word = icache_miss_resp_data_rdata[icache_miss_word_index*64 +: 64]; + icache_miss_rdata = {{ariane_pkg::ICACHE_LINE_WIDTH-64{1'b0}}, icache_miss_word}; + end else begin + icache_miss_rdata = icache_miss_resp_data_rdata; + end + end + end + endgenerate + + assign icache_miss_resp_valid_o = icache_miss_resp_meta_rok, + icache_miss_resp_o.rtype = wt_cache_pkg::ICACHE_IFILL_ACK, + icache_miss_resp_o.user = '0, + icache_miss_resp_o.inv = '0, + icache_miss_resp_o.tid = icache_miss_resp_meta_id, + icache_miss_resp_o.data = icache_miss_rdata; + + // consume the Icache miss on the arrival of the response. The request + // metadata is decoded to forward the correct word in case of uncacheable + // Icache access + assign icache_miss_req_r = icache_miss_resp_meta_rok; + // }}} + + // Read request arbiter + // {{{ + logic mem_req_read_ready [2:0]; + logic mem_req_read_valid [2:0]; + hpdcache_mem_req_t mem_req_read [2:0]; + + logic mem_req_read_ready_arb; + logic mem_req_read_valid_arb; + hpdcache_mem_req_t mem_req_read_arb; + + assign mem_req_read_valid[0] = icache_miss_req_rok & ~icache_miss_pending_q, + mem_req_read[0] = icache_miss_req_rdata; + + assign dcache_miss_ready_o = mem_req_read_ready[1], + mem_req_read_valid[1] = dcache_miss_valid_i, + mem_req_read[1] = dcache_miss_i; + + assign dcache_uc_read_ready_o = mem_req_read_ready[2], + mem_req_read_valid[2] = dcache_uc_read_valid_i, + mem_req_read[2] = dcache_uc_read_i; + + hpdcache_mem_req_read_arbiter #( + .N (3), + .hpdcache_mem_req_t (hpdcache_mem_req_t) + ) i_mem_req_read_arbiter ( + .clk_i, + .rst_ni, + + .mem_req_read_ready_o (mem_req_read_ready), + .mem_req_read_valid_i (mem_req_read_valid), + .mem_req_read_i (mem_req_read), + + .mem_req_read_ready_i (mem_req_read_ready_arb), + .mem_req_read_valid_o (mem_req_read_valid_arb), + .mem_req_read_o (mem_req_read_arb) + ); + // }}} + + // Read response demultiplexor + // {{{ + logic mem_resp_read_ready; + logic mem_resp_read_valid; + hpdcache_mem_resp_r_t mem_resp_read; + + logic mem_resp_read_ready_arb [2:0]; + logic mem_resp_read_valid_arb [2:0]; + hpdcache_mem_resp_r_t mem_resp_read_arb [2:0]; + + mem_resp_rt_t mem_resp_read_rt; + + always_comb + begin + for (int i = 0; i < MEM_RESP_RT_DEPTH; i++) begin + mem_resp_read_rt[i] = (i == int'( icache_miss_id_i)) ? 0 : + (i == int'(dcache_uc_read_id_i)) ? 2 : 1; + end + end + + hpdcache_mem_resp_demux #( + .N (3), + .resp_t (hpdcache_mem_resp_r_t), + .resp_id_t (hpdcache_mem_id_t) + ) i_mem_resp_read_demux ( + .clk_i, + .rst_ni, + + .mem_resp_ready_o (mem_resp_read_ready), + .mem_resp_valid_i (mem_resp_read_valid), + .mem_resp_id_i (mem_resp_read.mem_resp_r_id), + .mem_resp_i (mem_resp_read), + + .mem_resp_ready_i (mem_resp_read_ready_arb), + .mem_resp_valid_o (mem_resp_read_valid_arb), + .mem_resp_o (mem_resp_read_arb), + + .mem_resp_rt_i (mem_resp_read_rt) + ); + + assign icache_miss_resp_w = mem_resp_read_valid_arb[0], + icache_miss_resp_wdata = mem_resp_read_arb[0], + mem_resp_read_ready_arb[0] = icache_miss_resp_wok; + + assign dcache_miss_resp_valid_o = mem_resp_read_valid_arb[1], + dcache_miss_resp_o = mem_resp_read_arb[1], + mem_resp_read_ready_arb[1] = dcache_miss_resp_ready_i; + + assign dcache_uc_read_resp_valid_o = mem_resp_read_valid_arb[2], + dcache_uc_read_resp_o = mem_resp_read_arb[2], + mem_resp_read_ready_arb[2] = dcache_uc_read_resp_ready_i; + // }}} + + // Write request arbiter + // {{{ + logic mem_req_write_ready [1:0]; + logic mem_req_write_valid [1:0]; + hpdcache_mem_req_t mem_req_write [1:0]; + + logic mem_req_write_data_ready [1:0]; + logic mem_req_write_data_valid [1:0]; + hpdcache_mem_req_w_t mem_req_write_data [1:0]; + + logic mem_req_write_ready_arb; + logic mem_req_write_valid_arb; + hpdcache_mem_req_t mem_req_write_arb; + + logic mem_req_write_data_ready_arb; + logic mem_req_write_data_valid_arb; + hpdcache_mem_req_w_t mem_req_write_data_arb; + + assign dcache_wbuf_ready_o = mem_req_write_ready[0], + mem_req_write_valid[0] = dcache_wbuf_valid_i, + mem_req_write[0] = dcache_wbuf_i; + + assign dcache_wbuf_data_ready_o = mem_req_write_data_ready[0], + mem_req_write_data_valid[0] = dcache_wbuf_data_valid_i, + mem_req_write_data[0] = dcache_wbuf_data_i; + + assign dcache_uc_write_ready_o = mem_req_write_ready[1], + mem_req_write_valid[1] = dcache_uc_write_valid_i, + mem_req_write[1] = dcache_uc_write_i; + + assign dcache_uc_write_data_ready_o = mem_req_write_data_ready[1], + mem_req_write_data_valid[1] = dcache_uc_write_data_valid_i, + mem_req_write_data[1] = dcache_uc_write_data_i; + + hpdcache_mem_req_write_arbiter #( + .N (2), + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t) + ) i_mem_req_write_arbiter ( + .clk_i, + .rst_ni, + + .mem_req_write_ready_o (mem_req_write_ready), + .mem_req_write_valid_i (mem_req_write_valid), + .mem_req_write_i (mem_req_write), + + .mem_req_write_data_ready_o (mem_req_write_data_ready), + .mem_req_write_data_valid_i (mem_req_write_data_valid), + .mem_req_write_data_i (mem_req_write_data), + + .mem_req_write_ready_i (mem_req_write_ready_arb), + .mem_req_write_valid_o (mem_req_write_valid_arb), + .mem_req_write_o (mem_req_write_arb), + + .mem_req_write_data_ready_i (mem_req_write_data_ready_arb), + .mem_req_write_data_valid_o (mem_req_write_data_valid_arb), + .mem_req_write_data_o (mem_req_write_data_arb) + ); + // }}} + + // Write response demultiplexor + // {{{ + logic mem_resp_write_ready; + logic mem_resp_write_valid; + hpdcache_mem_resp_w_t mem_resp_write; + + logic mem_resp_write_ready_arb [1:0]; + logic mem_resp_write_valid_arb [1:0]; + hpdcache_mem_resp_w_t mem_resp_write_arb [1:0]; + + mem_resp_rt_t mem_resp_write_rt; + + always_comb + begin + for (int i = 0; i < MEM_RESP_RT_DEPTH; i++) begin + mem_resp_write_rt[i] = (i == int'(dcache_uc_write_id_i)) ? 1 : 0; + end + end + + hpdcache_mem_resp_demux #( + .N (2), + .resp_t (hpdcache_mem_resp_w_t), + .resp_id_t (hpdcache_mem_id_t) + ) i_hpdcache_mem_resp_write_demux ( + .clk_i, + .rst_ni, + + .mem_resp_ready_o (mem_resp_write_ready), + .mem_resp_valid_i (mem_resp_write_valid), + .mem_resp_id_i (mem_resp_write.mem_resp_w_id), + .mem_resp_i (mem_resp_write), + + .mem_resp_ready_i (mem_resp_write_ready_arb), + .mem_resp_valid_o (mem_resp_write_valid_arb), + .mem_resp_o (mem_resp_write_arb), + + .mem_resp_rt_i (mem_resp_write_rt) + ); + + assign dcache_wbuf_resp_valid_o = mem_resp_write_valid_arb[0], + dcache_wbuf_resp_o = mem_resp_write_arb[0], + mem_resp_write_ready_arb[0] = dcache_wbuf_resp_ready_i; + + assign dcache_uc_write_resp_valid_o = mem_resp_write_valid_arb[1], + dcache_uc_write_resp_o = mem_resp_write_arb[1], + mem_resp_write_ready_arb[1] = dcache_uc_write_resp_ready_i; + // }}} + + // I$ miss pending + // {{{ + always_ff @(posedge clk_i or negedge rst_ni) + begin : icache_miss_pending_ff + if (!rst_ni) begin + icache_miss_pending_q <= 1'b0; + end else begin + icache_miss_pending_q <= ( (icache_miss_req_rok & mem_req_read_ready[0]) & ~icache_miss_pending_q) | + (~(icache_miss_req_r & icache_miss_req_rok) & icache_miss_pending_q); + end + end + // }}} + + // AXI adapters + // {{{ + axi_req_t axi_req; + axi_rsp_t axi_resp; + + hpdcache_mem_to_axi_write #( + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), + .hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t), + .aw_chan_t (axi_aw_chan_t), + .w_chan_t (axi_w_chan_t), + .b_chan_t (axi_b_chan_t) + ) i_hpdcache_mem_to_axi_write ( + .req_ready_o (mem_req_write_ready_arb), + .req_valid_i (mem_req_write_valid_arb), + .req_i (mem_req_write_arb), + + .req_data_ready_o (mem_req_write_data_ready_arb), + .req_data_valid_i (mem_req_write_data_valid_arb), + .req_data_i (mem_req_write_data_arb), + + .resp_ready_i (mem_resp_write_ready), + .resp_valid_o (mem_resp_write_valid), + .resp_o (mem_resp_write), + + .axi_aw_valid_o (axi_req.aw_valid), + .axi_aw_o (axi_req.aw), + .axi_aw_ready_i (axi_resp.aw_ready), + + .axi_w_valid_o (axi_req.w_valid), + .axi_w_o (axi_req.w), + .axi_w_ready_i (axi_resp.w_ready), + + .axi_b_valid_i (axi_resp.b_valid), + .axi_b_i (axi_resp.b), + .axi_b_ready_o (axi_req.b_ready) + ); + + hpdcache_mem_to_axi_read #( + .hpdcache_mem_req_t (hpdcache_mem_req_t), + .hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t), + .ar_chan_t (axi_ar_chan_t), + .r_chan_t (axi_r_chan_t) + ) i_hpdcache_mem_to_axi_read ( + .req_ready_o (mem_req_read_ready_arb), + .req_valid_i (mem_req_read_valid_arb), + .req_i (mem_req_read_arb), + + .resp_ready_i (mem_resp_read_ready), + .resp_valid_o (mem_resp_read_valid), + .resp_o (mem_resp_read), + + .axi_ar_valid_o (axi_req.ar_valid), + .axi_ar_o (axi_req.ar), + .axi_ar_ready_i (axi_resp.ar_ready), + + .axi_r_valid_i (axi_resp.r_valid), + .axi_r_i (axi_resp.r), + .axi_r_ready_o (axi_req.r_ready) + ); + + assign axi_req_o = axi_req; + assign axi_resp = axi_resp_i; + // }}} + + // Assertions + // {{{ + // pragma translate_off + initial assert (HPDcacheMemIdWidth <= AxiIdWidth) else + $fatal("HPDcacheMemIdWidth shall be less or equal to AxiIdWidth"); + initial assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_MSHR_SET_WIDTH + hpdcache_pkg::HPDCACHE_MSHR_WAY_WIDTH + 1)) else + $fatal("HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache misses and Icache misses"); + initial assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_WBUF_DIR_PTR_WIDTH + 1)) else + $fatal("HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache cacheable writes and uncacheable writes"); + initial assert (HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH) else + $fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Icache line"); + initial assert (HPDcacheMemDataWidth <= ariane_pkg::DCACHE_LINE_WIDTH) else + $fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Dcache line"); + // pragma translate_on + // }}} + +endmodule : cva6_hpdcache_subsystem_axi_arbiter diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/target/generic/hpdcache_params_pkg.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/target/generic/hpdcache_params_pkg.sv new file mode 100644 index 00000000000..92f8d1de5ca --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/target/generic/hpdcache_params_pkg.sv @@ -0,0 +1,180 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2023 + * Description : Generic parameters package for the HPDcache. All parameters + * can be overriden by Verilog preprocessor definitions. + * History : + */ +package hpdcache_params_pkg; + // Definition of global constants for the HPDcache data and directory + // {{{ + `ifndef CONF_HPDCACHE_PA_WIDTH + `define CONF_HPDCACHE_PA_WIDTH 49 + `endif + localparam int unsigned PARAM_PA_WIDTH = `CONF_HPDCACHE_PA_WIDTH; + + // HPDcache number of sets + `ifndef CONF_HPDCACHE_SETS + `define CONF_HPDCACHE_SETS 128 + `endif + localparam int unsigned PARAM_SETS = `CONF_HPDCACHE_SETS; + + // HPDcache number of ways + `ifndef CONF_HPDCACHE_WAYS + `define CONF_HPDCACHE_WAYS 4 + `endif + localparam int unsigned PARAM_WAYS = `CONF_HPDCACHE_WAYS; + + // HPDcache word width (bits) + `ifndef CONF_HPDCACHE_WORD_WIDTH + `define CONF_HPDCACHE_WORD_WIDTH 64 + `endif + localparam int unsigned PARAM_WORD_WIDTH = `CONF_HPDCACHE_WORD_WIDTH; + + // HPDcache cache-line width (bits) + `ifndef CONF_HPDCACHE_CL_WORDS + `define CONF_HPDCACHE_CL_WORDS 8 + `endif + localparam int unsigned PARAM_CL_WORDS = `CONF_HPDCACHE_CL_WORDS; + + // HPDcache number of words in the request data channels (request and response) + `ifndef CONF_HPDCACHE_REQ_WORDS + `define CONF_HPDCACHE_REQ_WORDS 1 + `endif + localparam int unsigned PARAM_REQ_WORDS = `CONF_HPDCACHE_REQ_WORDS; + + // HPDcache request transaction ID width (bits) + `ifndef CONF_HPDCACHE_REQ_TRANS_ID_WIDTH + `define CONF_HPDCACHE_REQ_TRANS_ID_WIDTH 7 + `endif + localparam int unsigned PARAM_REQ_TRANS_ID_WIDTH = `CONF_HPDCACHE_REQ_TRANS_ID_WIDTH; + + // HPDcache request source ID width (bits) + `ifndef CONF_HPDCACHE_REQ_SRC_ID_WIDTH + `define CONF_HPDCACHE_REQ_SRC_ID_WIDTH 3 + `endif + localparam int unsigned PARAM_REQ_SRC_ID_WIDTH = `CONF_HPDCACHE_REQ_SRC_ID_WIDTH; + + // HPDcache physically indexed + `ifndef CONF_HPDCACHE_PHYSICALLY_INDEXED + `define CONF_HPDCACHE_PHYSICALLY_INDEXED 1'b0 + `endif + localparam bit PARAM_PHYSICALLY_INDEXED = `CONF_HPDCACHE_PHYSICALLY_INDEXED; + // }}} + + // Definition of constants and types for HPDcache data memory + // {{{ + `ifndef CONF_HPDCACHE_DATA_WAYS_PER_RAM_WORD + `define CONF_HPDCACHE_DATA_WAYS_PER_RAM_WORD 2 + `endif + localparam int unsigned PARAM_DATA_WAYS_PER_RAM_WORD = `CONF_HPDCACHE_DATA_WAYS_PER_RAM_WORD; + + `ifndef CONF_HPDCACHE_DATA_SETS_PER_RAM + `define CONF_HPDCACHE_DATA_SETS_PER_RAM PARAM_SETS + `endif + localparam int unsigned PARAM_DATA_SETS_PER_RAM = `CONF_HPDCACHE_DATA_SETS_PER_RAM; + + // HPDcache DATA RAM implements write byte enable + `ifndef CONF_HPDCACHE_DATA_RAM_WBYTEENABLE + `define CONF_HPDCACHE_DATA_RAM_WBYTEENABLE 0 + `endif + localparam bit PARAM_DATA_RAM_WBYTEENABLE = `CONF_HPDCACHE_DATA_RAM_WBYTEENABLE; + + // Define the number of memory contiguous words that can be accessed + // simultaneously from the cache. + // - This limits the maximum width for the data channel from requesters + // - This impacts the refill latency + `ifndef CONF_HPDCACHE_ACCESS_WORDS + `define CONF_HPDCACHE_ACCESS_WORDS 4 + `endif + localparam int unsigned PARAM_ACCESS_WORDS = `CONF_HPDCACHE_ACCESS_WORDS; + // }}} + + // Definition of constants and types for the Miss Status Holding Register (MSHR) + // {{{ + `ifndef CONF_HPDCACHE_MSHR_SETS + `define CONF_HPDCACHE_MSHR_SETS 64 + `endif + localparam int unsigned PARAM_MSHR_SETS = `CONF_HPDCACHE_MSHR_SETS; + + // HPDcache MSHR number of ways + `ifndef CONF_HPDCACHE_MSHR_WAYS + `define CONF_HPDCACHE_MSHR_WAYS 2 + `endif + localparam int unsigned PARAM_MSHR_WAYS = `CONF_HPDCACHE_MSHR_WAYS; + + // HPDcache MSHR number of ways in the same SRAM word + `ifndef CONF_HPDCACHE_MSHR_WAYS_PER_RAM_WORD + `define CONF_HPDCACHE_MSHR_WAYS_PER_RAM_WORD 2 + `endif + localparam int unsigned PARAM_MSHR_WAYS_PER_RAM_WORD = `CONF_HPDCACHE_MSHR_WAYS_PER_RAM_WORD; + + // HPDcache MSHR number of sets in the same SRAM + `ifndef CONF_HPDCACHE_MSHR_SETS_PER_RAM + `define CONF_HPDCACHE_MSHR_SETS_PER_RAM PARAM_MSHR_SETS + `endif + localparam int unsigned PARAM_MSHR_SETS_PER_RAM = `CONF_HPDCACHE_MSHR_SETS_PER_RAM; + + // HPDcache MSHR implements write byte enable + `ifndef CONF_HPDCACHE_MSHR_RAM_WBYTEENABLE + `define CONF_HPDCACHE_MSHR_RAM_WBYTEENABLE 0 + `endif + localparam bit PARAM_MSHR_RAM_WBYTEENABLE = `CONF_HPDCACHE_MSHR_RAM_WBYTEENABLE; + + `ifndef CONF_HPDCACHE_MSHR_USE_REGBANK + `define CONF_HPDCACHE_MSHR_USE_REGBANK 0 + `endif + localparam bit PARAM_MSHR_USE_REGBANK = `CONF_HPDCACHE_MSHR_USE_REGBANK; + // }}} + + // Definition of constants and types for the Write Buffer (WBUF) + // {{{ + `ifndef CONF_HPDCACHE_WBUF_DIR_ENTRIES + `define CONF_HPDCACHE_WBUF_DIR_ENTRIES 16 + `endif + localparam int unsigned PARAM_WBUF_DIR_ENTRIES = `CONF_HPDCACHE_WBUF_DIR_ENTRIES; + + `ifndef CONF_HPDCACHE_WBUF_DATA_ENTRIES + `define CONF_HPDCACHE_WBUF_DATA_ENTRIES 4 + `endif + localparam int unsigned PARAM_WBUF_DATA_ENTRIES = `CONF_HPDCACHE_WBUF_DATA_ENTRIES; + + `ifndef CONF_HPDCACHE_WBUF_WORDS + `define CONF_HPDCACHE_WBUF_WORDS PARAM_REQ_WORDS + `endif + localparam int unsigned PARAM_WBUF_WORDS = `CONF_HPDCACHE_WBUF_WORDS; + + `ifndef CONF_HPDCACHE_WBUF_TIMECNT_WIDTH + `define CONF_HPDCACHE_WBUF_TIMECNT_WIDTH 4 + `endif + localparam int unsigned PARAM_WBUF_TIMECNT_WIDTH = `CONF_HPDCACHE_WBUF_TIMECNT_WIDTH; + // }}} + + // Definition of constants and types for the Replay Table (RTAB) + // {{{ + `ifndef CONF_HPDCACHE_RTAB_ENTRIES + `define CONF_HPDCACHE_RTAB_ENTRIES 8 + `endif + localparam int PARAM_RTAB_ENTRIES = `CONF_HPDCACHE_RTAB_ENTRIES; + // }}} + +endpackage diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv new file mode 100644 index 00000000000..cb32acf57a8 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/utils/hpdcache_mem_req_read_arbiter.sv @@ -0,0 +1,103 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Dcache Memory Read Request Channel Arbiter + * History : + */ +module hpdcache_mem_req_read_arbiter +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter hpdcache_uint N = 0, + parameter type hpdcache_mem_req_t = logic +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + output logic mem_req_read_ready_o [N-1:0], + input logic mem_req_read_valid_i [N-1:0], + input hpdcache_mem_req_t mem_req_read_i [N-1:0], + + input logic mem_req_read_ready_i, + output logic mem_req_read_valid_o, + output hpdcache_mem_req_t mem_req_read_o +); +// }}} + + logic [N-1:0] mem_read_arb_req_valid; + hpdcache_mem_req_t [N-1:0] mem_read_arb_req; + logic [N-1:0] mem_read_arb_req_gnt; + + logic req_valid; + + genvar gen_i; + + + // Pack inputs + generate + for (gen_i = 0; gen_i < int'(N); gen_i++) begin : pack_inputs_gen + assign mem_read_arb_req_valid[gen_i] = mem_req_read_valid_i[gen_i], + mem_read_arb_req [gen_i] = mem_req_read_i[gen_i]; + end + endgenerate + + assign req_valid = |(mem_read_arb_req_gnt & mem_read_arb_req_valid); + + // Fixed-priority arbiter + hpdcache_fxarb #( + .N (N) + ) hpdcache_fxarb_mem_req_write_i ( + .clk_i, + .rst_ni, + .req_i (mem_read_arb_req_valid), + .gnt_o (mem_read_arb_req_gnt), + .ready_i (mem_req_read_ready_i) + ); + + // Demultiplexor for the ready signal + generate + for (gen_i = 0; gen_i < int'(N); gen_i++) begin : req_ready_gen + assign mem_req_read_ready_o[gen_i] = mem_req_read_ready_i & + mem_read_arb_req_gnt[gen_i] & mem_read_arb_req_valid[gen_i]; + end + endgenerate + + assign mem_req_read_valid_o = req_valid; + + // Multiplexor for requests + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH ($bits(hpdcache_mem_req_t)), + .ONE_HOT_SEL (1'b1) + ) mem_read_req_mux_i ( + .data_i (mem_read_arb_req), + .sel_i (mem_read_arb_req_gnt), + .data_o (mem_req_read_o) + ); + +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv new file mode 100644 index 00000000000..a7916eca7e2 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/utils/hpdcache_mem_req_write_arbiter.sv @@ -0,0 +1,193 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Dcache Memory Write Channels Arbiter + * History : + */ +module hpdcache_mem_req_write_arbiter +import hpdcache_pkg::*; +// Parameters +// {{{ +#( + parameter hpdcache_uint N = 0, + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_req_w_t = logic +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + output logic mem_req_write_ready_o [N-1:0], + input logic mem_req_write_valid_i [N-1:0], + input hpdcache_mem_req_t mem_req_write_i [N-1:0], + + output logic mem_req_write_data_ready_o [N-1:0], + input logic mem_req_write_data_valid_i [N-1:0], + input hpdcache_mem_req_w_t mem_req_write_data_i [N-1:0], + + input logic mem_req_write_ready_i, + output logic mem_req_write_valid_o, + output hpdcache_mem_req_t mem_req_write_o, + + input logic mem_req_write_data_ready_i, + output logic mem_req_write_data_valid_o, + output hpdcache_mem_req_w_t mem_req_write_data_o +); +// }}} + + typedef enum { + REQ_IDLE, + REQ_META_SENT, + REQ_DATA_SENT + } req_send_fsm_t; + + req_send_fsm_t req_send_fsm_q, req_send_fsm_d; + logic req_valid; + logic req_data_valid; + + logic [N-1:0] mem_write_arb_req_valid; + hpdcache_mem_req_t [N-1:0] mem_write_arb_req; + logic [N-1:0] mem_write_arb_req_data_valid; + hpdcache_mem_req_w_t [N-1:0] mem_write_arb_req_data; + logic [N-1:0] mem_write_arb_req_gnt; + logic mem_write_arb_req_ready; + + genvar gen_i; + + + generate + for (gen_i = 0; gen_i < int'(N); gen_i++) begin : pack_inputs_gen + assign mem_write_arb_req_valid [gen_i] = mem_req_write_valid_i[gen_i], + mem_write_arb_req [gen_i] = mem_req_write_i[gen_i], + mem_write_arb_req_data_valid[gen_i] = mem_req_write_data_valid_i[gen_i], + mem_write_arb_req_data [gen_i] = mem_req_write_data_i[gen_i]; + end + endgenerate + + // Fixed-priority arbiter + hpdcache_fxarb #( + .N (2) + ) hpdcache_fxarb_mem_req_write_i ( + .clk_i, + .rst_ni, + .req_i (mem_write_arb_req_valid), + .gnt_o (mem_write_arb_req_gnt), + .ready_i (mem_write_arb_req_ready) + ); + + assign req_valid = |(mem_write_arb_req_gnt & mem_write_arb_req_valid); + assign req_data_valid = |(mem_write_arb_req_gnt & mem_write_arb_req_data_valid); + + // Request sent FSM + // + // This FSM allows to make sure that the request and its corresponding + // data are sent in order. This is, when a requester sends a request, this + // FSM keeps the grant signal on this requester until it has sent the + // corresponding data. + // + // {{{ + always_comb + begin : req_send_fsm_comb + req_send_fsm_d = req_send_fsm_q; + mem_write_arb_req_ready = 1'b0; + case (req_send_fsm_q) + REQ_IDLE: + if (req_valid && mem_req_write_ready_i) begin + if (req_data_valid) begin + if (mem_req_write_data_ready_i) begin + mem_write_arb_req_ready = 1'b1; + req_send_fsm_d = REQ_IDLE; + end else begin + req_send_fsm_d = REQ_META_SENT; + end + end + end else if (req_data_valid && mem_req_write_data_ready_i) begin + req_send_fsm_d = REQ_DATA_SENT; + end + + REQ_META_SENT: + if (req_data_valid && mem_req_write_data_ready_i) begin + mem_write_arb_req_ready = 1'b1; + req_send_fsm_d = REQ_IDLE; + end + + REQ_DATA_SENT: + if (req_valid && mem_req_write_ready_i) begin + mem_write_arb_req_ready = 1'b1; + req_send_fsm_d = REQ_IDLE; + end + endcase + end + + always_ff @(posedge clk_i or negedge rst_ni) + begin : req_send_fsm_ff + if (!rst_ni) begin + req_send_fsm_q <= REQ_IDLE; + end else begin + req_send_fsm_q <= req_send_fsm_d; + end + end + // }}} + + generate + for (gen_i = 0; gen_i < int'(N); gen_i++) begin : req_ready_gen + assign mem_req_write_ready_o[gen_i] = + (mem_write_arb_req_gnt[gen_i] & mem_req_write_ready_i) & + (req_send_fsm_q != REQ_META_SENT); + + assign mem_req_write_data_ready_o[gen_i] = + (mem_write_arb_req_gnt[gen_i] & mem_req_write_data_ready_i) & + (req_send_fsm_q != REQ_DATA_SENT); + end + endgenerate + + // Output assignments + // {{{ + assign mem_req_write_valid_o = req_valid & (req_send_fsm_q != REQ_META_SENT); + assign mem_req_write_data_valid_o = req_data_valid & (req_send_fsm_q != REQ_DATA_SENT); + + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH ($bits(hpdcache_mem_req_t)), + .ONE_HOT_SEL (1'b1) + ) mem_write_req_mux_i ( + .data_i (mem_write_arb_req), + .sel_i (mem_write_arb_req_gnt), + .data_o (mem_req_write_o) + ); + + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH ($bits(hpdcache_mem_req_w_t)), + .ONE_HOT_SEL (1'b1) + ) mem_write_data_req_mux_i ( + .data_i (mem_write_arb_req_data), + .sel_i (mem_write_arb_req_gnt), + .data_o (mem_req_write_data_o) + ); + // }}} + +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv new file mode 100644 index 00000000000..c1502a985bb --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/utils/hpdcache_mem_resp_demux.sv @@ -0,0 +1,108 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : June, 2022 + * Description : Dcache Memory Reponse Demultiplexer + * History : + */ +module hpdcache_mem_resp_demux +// Parameters +// {{{ +#( + parameter int N = 0, + parameter type resp_t = logic, + parameter type resp_id_t = logic, + + localparam int RT_DEPTH = (1 << $bits(resp_id_t)), + localparam type rt_t = resp_id_t [RT_DEPTH-1:0] +) +// }}} + +// Ports +// {{{ +( + input logic clk_i, + input logic rst_ni, + + output logic mem_resp_ready_o, + input logic mem_resp_valid_i, + input resp_id_t mem_resp_id_i, + input resp_t mem_resp_i, + + input logic mem_resp_ready_i [N-1:0], + output logic mem_resp_valid_o [N-1:0], + output resp_t mem_resp_o [N-1:0], + + input rt_t mem_resp_rt_i +); +// }}} + + typedef logic [$clog2(N)-1:0] sel_t; + + logic [N-1:0] mem_resp_demux_valid; + resp_t [N-1:0] mem_resp_demux; + logic [N-1:0] mem_resp_demux_ready; + sel_t mem_resp_demux_sel; + + // Route the response according to the response ID and the routing table + assign mem_resp_demux_sel = mem_resp_rt_i[int'(mem_resp_id_i)]; + + // Forward the response to the corresponding output port + hpdcache_demux #( + .NOUTPUT (N), + .DATA_WIDTH (1), + .ONE_HOT_SEL (0) + ) i_resp_valid_demux ( + .data_i (mem_resp_valid_i), + .sel_i (mem_resp_demux_sel), + .data_o (mem_resp_demux_valid) + ); + + hpdcache_demux #( + .NOUTPUT (N), + .DATA_WIDTH ($bits(resp_t)), + .ONE_HOT_SEL (0) + ) i_resp_demux ( + .data_i (mem_resp_i), + .sel_i (mem_resp_demux_sel), + .data_o (mem_resp_demux) + ); + + hpdcache_mux #( + .NINPUT (N), + .DATA_WIDTH (1), + .ONE_HOT_SEL (0) + ) i_resp_ready_mux ( + .data_i (mem_resp_demux_ready), + .sel_i (mem_resp_demux_sel), + .data_o (mem_resp_ready_o) + ); + + // Pack/unpack responses + generate + for (genvar gen_i = 0; gen_i < int'(N); gen_i++) begin : pack_unpack_resp_gen + assign mem_resp_valid_o [gen_i] = mem_resp_demux_valid [gen_i]; + assign mem_resp_o [gen_i] = mem_resp_demux [gen_i]; + assign mem_resp_demux_ready [gen_i] = mem_resp_ready_i [gen_i]; + end + endgenerate + +endmodule : hpdcache_mem_resp_demux diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv new file mode 100644 index 00000000000..ec3fad741f7 --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/utils/hpdcache_mem_to_axi_read.sv @@ -0,0 +1,95 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Dcache memory request to axi read channels + * History : + */ +module hpdcache_mem_to_axi_read +import hpdcache_pkg::*; +#( + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_resp_r_t = logic, + parameter type ar_chan_t = logic, + parameter type r_chan_t = logic +) +( + output logic req_ready_o, + input logic req_valid_i, + input hpdcache_mem_req_t req_i, + + input logic resp_ready_i, + output logic resp_valid_o, + output hpdcache_mem_resp_r_t resp_o, + + output logic axi_ar_valid_o, + output ar_chan_t axi_ar_o, + input logic axi_ar_ready_i, + + input logic axi_r_valid_i, + input r_chan_t axi_r_i, + output logic axi_r_ready_o +); + + logic lock; + axi_pkg::cache_t cache; + hpdcache_mem_error_e resp; + + assign lock = (req_i.mem_req_command == HPDCACHE_MEM_ATOMIC) && + (req_i.mem_req_atomic == HPDCACHE_MEM_ATOMIC_LDEX); + + assign cache = req_i.mem_req_cacheable ? + axi_pkg::CACHE_BUFFERABLE | + axi_pkg::CACHE_MODIFIABLE | + axi_pkg::CACHE_RD_ALLOC | + axi_pkg::CACHE_WR_ALLOC : '0; + + always_comb + begin : resp_decode_comb + case (axi_r_i.resp) + axi_pkg::RESP_SLVERR, + axi_pkg::RESP_DECERR: resp = HPDCACHE_MEM_RESP_NOK; + default: resp = HPDCACHE_MEM_RESP_OK; + endcase + end + + assign req_ready_o = axi_ar_ready_i, + axi_ar_valid_o = req_valid_i, + axi_ar_o.id = req_i.mem_req_id, + axi_ar_o.addr = req_i.mem_req_addr, + axi_ar_o.len = req_i.mem_req_len, + axi_ar_o.size = req_i.mem_req_size, + axi_ar_o.burst = axi_pkg::BURST_INCR, + axi_ar_o.lock = lock, + axi_ar_o.cache = cache, + axi_ar_o.prot = '0, + axi_ar_o.qos = '0, + axi_ar_o.region = '0, + axi_ar_o.user = '0; + + assign axi_r_ready_o = resp_ready_i, + resp_valid_o = axi_r_valid_i, + resp_o.mem_resp_r_error = resp, + resp_o.mem_resp_r_id = axi_r_i.id, + resp_o.mem_resp_r_data = axi_r_i.data, + resp_o.mem_resp_r_last = axi_r_i.last; + +endmodule diff --git a/vendor/openhwgroup/cvhpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv b/vendor/openhwgroup/cvhpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv new file mode 100644 index 00000000000..8d8eb9f6d8c --- /dev/null +++ b/vendor/openhwgroup/cvhpdcache/rtl/src/utils/hpdcache_mem_to_axi_write.sv @@ -0,0 +1,148 @@ +/* + * Copyright 2023 CEA* + * *Commissariat a l'Energie Atomique et aux Energies Alternatives (CEA) + * + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + * + * Licensed under the Solderpad Hardware License v 2.1 (the “License”); you + * may not use this file except in compliance with the License, or, at your + * option, the Apache License version 2.0. You may obtain a copy of the + * License at + * + * https://solderpad.org/licenses/SHL-2.1/ + * + * Unless required by applicable law or agreed to in writing, any work + * distributed under the License is distributed on an “AS IS” BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +/* + * Authors : Cesar Fuguet + * Creation Date : April, 2021 + * Description : Dcache memory request to axi write channels + * History : + */ +module hpdcache_mem_to_axi_write +import hpdcache_pkg::*; +#( + parameter type hpdcache_mem_req_t = logic, + parameter type hpdcache_mem_req_w_t = logic, + parameter type hpdcache_mem_resp_w_t = logic, + parameter type aw_chan_t = logic, + parameter type w_chan_t = logic, + parameter type b_chan_t = logic +) +( + output logic req_ready_o, + input logic req_valid_i, + input hpdcache_mem_req_t req_i, + + output logic req_data_ready_o, + input logic req_data_valid_i, + input hpdcache_mem_req_w_t req_data_i, + + input logic resp_ready_i, + output logic resp_valid_o, + output hpdcache_mem_resp_w_t resp_o, + + output logic axi_aw_valid_o, + output aw_chan_t axi_aw_o, + input logic axi_aw_ready_i, + + output logic axi_w_valid_o, + output w_chan_t axi_w_o, + input logic axi_w_ready_i, + + input logic axi_b_valid_i, + input b_chan_t axi_b_i, + output logic axi_b_ready_o +); + + logic lock; + axi_pkg::atop_t atop; + axi_pkg::cache_t cache; + hpdcache_mem_error_e resp; + + always_comb + begin : atop_comb + lock = 1'b0; + atop = '0; + case (req_i.mem_req_command) + HPDCACHE_MEM_ATOMIC: begin + case (req_i.mem_req_atomic) + HPDCACHE_MEM_ATOMIC_STEX: lock = 1'b1; + HPDCACHE_MEM_ATOMIC_ADD : atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_ADD}; + HPDCACHE_MEM_ATOMIC_CLR : atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_CLR}; + HPDCACHE_MEM_ATOMIC_SET : atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_SET}; + HPDCACHE_MEM_ATOMIC_EOR : atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_EOR}; + HPDCACHE_MEM_ATOMIC_SMAX: atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_SMAX}; + HPDCACHE_MEM_ATOMIC_SMIN: atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_SMIN}; + HPDCACHE_MEM_ATOMIC_UMAX: atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_UMAX}; + HPDCACHE_MEM_ATOMIC_UMIN: atop = {axi_pkg::ATOP_ATOMICLOAD, + axi_pkg::ATOP_LITTLE_END, + axi_pkg::ATOP_UMIN}; + HPDCACHE_MEM_ATOMIC_SWAP: atop = axi_pkg::ATOP_ATOMICSWAP; + endcase + end + endcase + end + + assign cache = (req_i.mem_req_cacheable && !lock) ? + axi_pkg::CACHE_BUFFERABLE | + axi_pkg::CACHE_MODIFIABLE | + axi_pkg::CACHE_RD_ALLOC | + axi_pkg::CACHE_WR_ALLOC : '0; + + always_comb + begin : resp_decode_comb + case (axi_b_i.resp) + axi_pkg::RESP_SLVERR, + axi_pkg::RESP_DECERR: resp = HPDCACHE_MEM_RESP_NOK; + default: resp = HPDCACHE_MEM_RESP_OK; + endcase + end + + assign req_ready_o = axi_aw_ready_i, + axi_aw_valid_o = req_valid_i, + axi_aw_o.id = req_i.mem_req_id, + axi_aw_o.addr = req_i.mem_req_addr, + axi_aw_o.len = req_i.mem_req_len, + axi_aw_o.size = req_i.mem_req_size, + axi_aw_o.burst = axi_pkg::BURST_INCR, + axi_aw_o.lock = lock, + axi_aw_o.cache = cache, + axi_aw_o.prot = '0, + axi_aw_o.qos = '0, + axi_aw_o.region = '0, + axi_aw_o.atop = atop, + axi_aw_o.user = '0; + + assign req_data_ready_o = axi_w_ready_i, + axi_w_valid_o = req_data_valid_i, + axi_w_o.data = req_data_i.mem_req_w_data, + axi_w_o.strb = req_data_i.mem_req_w_be, + axi_w_o.last = req_data_i.mem_req_w_last, + axi_w_o.user = '0; + + assign axi_b_ready_o = resp_ready_i, + resp_valid_o = axi_b_valid_i, + resp_o.mem_resp_w_error = resp, + resp_o.mem_resp_w_id = axi_b_i.id, + resp_o.mem_resp_w_is_atomic = (axi_b_i.resp == axi_pkg::RESP_EXOKAY); + +endmodule diff --git a/vendor/openhwgroup_cvhpdcache.lock.hjson b/vendor/openhwgroup_cvhpdcache.lock.hjson new file mode 100644 index 00000000000..f2faf95734b --- /dev/null +++ b/vendor/openhwgroup_cvhpdcache.lock.hjson @@ -0,0 +1,14 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// This file is generated by the util/vendor script. Please do not modify it +// manually. + +{ + upstream: + { + url: https://github.com/openhwgroup/cv-hpdcache + rev: 0cf8e7970bba32ec4ff87ba70d7ee25044fd0382 + } +} diff --git a/vendor/openhwgroup_cvhpdcache.vendor.hjson b/vendor/openhwgroup_cvhpdcache.vendor.hjson new file mode 100644 index 00000000000..9072460164d --- /dev/null +++ b/vendor/openhwgroup_cvhpdcache.vendor.hjson @@ -0,0 +1,28 @@ +// -*- coding: utf-8 -*- +// Copyright (C) 2023 Commissariat a l'Energie Atomique et aux +// Energies Alternatives +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0. +// Original Author: Cesar Fuguet (cesar.fuguettortolero@cea.fr) + +{ + // Name of the project + name: "openhwgroup_cvhpdcache", + + // Target directory: relative to the location of this script. + target_dir: "openhwgroup/cvhpdcache", + + // Upstream repository + upstream: { + // URL + url: "https://github.com/openhwgroup/cv-hpdcache", + // revision + rev: "v3.0.0", + } + + // Patch dir for local changes + patch_dir: "patches/openhwgroup/cvhpdcache", + + // Exclusions from upstream content + exclude_from_upstream: [] +} +