Skip to content

Commit

Permalink
Moved vset stalling to fetch
Browse files Browse the repository at this point in the history
  • Loading branch information
aarongchan committed May 21, 2024
1 parent c098650 commit daaa41f
Show file tree
Hide file tree
Showing 9 changed files with 155 additions and 97 deletions.
2 changes: 1 addition & 1 deletion arches/big_core.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ include: medium_core.yaml

top.cpu.core0:
fetch.params.num_to_fetch: 8
decode.params.num_to_decode: 8
decode.params.num_to_decode: 3
rename.params.num_to_rename: 8
rename.params.num_integer_renames: 64
rename.params.num_float_renames: 64
Expand Down
7 changes: 5 additions & 2 deletions core/CPUTopology.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -381,8 +381,10 @@ void olympia::CoreTopologySimple::bindTree(sparta::RootTreeNode* root_node)
pipe_target_end = stoi(iq[1]);
}
pipe_target_end++;
const std::string vset_in =
const std::string vset_in_decode =
core_node + ".decode." + "ports.in_vset_inst";
const std::string vset_in_fetch =
core_node + ".fetch." + "ports.in_vset_inst";
for (int pipe_idx = pipe_target_start; pipe_idx < pipe_target_end; ++pipe_idx)
{
std::string unit_name = "exe" + std::to_string(pipe_idx);
Expand All @@ -403,7 +405,8 @@ void olympia::CoreTopologySimple::bindTree(sparta::RootTreeNode* root_node)
// only bind execute pipe -> decode port for an issue queue if it has a vset pipe
const std::string exe_vset_out =
core_node + ".execute." + unit_name + ".ports.out_vset";
bind_ports(vset_in, exe_vset_out);
bind_ports(vset_in_decode, exe_vset_out);
bind_ports(vset_in_fetch, exe_vset_out);
break; // break after because there should only be one vset per issue queue
}
}
Expand Down
3 changes: 0 additions & 3 deletions core/CoreTypes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@ namespace olympia::core_types
N_REGFILES = RF_INVALID
};

// std::vector<core_types::RegFile> reg_files = {core_types::RF_INTEGER, core_types::RF_FLOAT,
// core_types::RF_VECTOR};

static inline const char* const regfile_names[] = {"integer", "float", "vector"};

static inline const char* const issue_queue_types[] = {"alu", "fpu", "br", "vint", "vset"};
Expand Down
149 changes: 73 additions & 76 deletions core/Decode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ namespace olympia
sparta::Unit(node),

fetch_queue_("FetchQueue", p->fetch_queue_size, node->getClock(), &unit_stat_set_),
uop_queue_("UOpQueue", p->uop_queue_size, node->getClock(), &unit_stat_set_),
fusion_num_fuse_instructions_(&unit_stat_set_, "fusion_num_fuse_instructions",
"The number of custom instructions created by fusion",
sparta::Counter::COUNT_NORMAL),
Expand Down Expand Up @@ -121,7 +122,6 @@ namespace olympia
VCSRs_.lmul = inst->getLMUL();
VCSRs_.sew = inst->getSEW();
VCSRs_.vl = inst->getVL();
waiting_on_vset_ = false;
// schedule decode, because we've been stalled on vset
ev_decode_insts_event_.schedule(sparta::Clock::Cycle(0));
}
Expand All @@ -137,7 +137,7 @@ namespace olympia
// Decode instructions
void Decode::decodeInsts_()
{
uint32_t num_decode = std::min(uop_queue_credits_, fetch_queue_.size());
uint32_t num_decode = std::min(uop_queue_credits_, fetch_queue_.size() + uop_queue_.size());
num_decode = std::min(num_decode, num_to_decode_);

// buffer to maximize the chances of a group match limited
Expand All @@ -162,98 +162,95 @@ namespace olympia
// Send instructions on their way to rename
for (uint32_t i = 0; i < num_decode; ++i)
{
const auto & inst = fetch_queue_.read(0);
// if we're waiting on a vset, but it's a scalar instruction
// we can process all scalars after the vset until we reach a vset the decode queue
if ((!waiting_on_vset_) || (waiting_on_vset_ && !inst->isVector()))
{
// we only need to stall for vset when it's
// vsetvl or a vset{i}vl{i} that has a vl that is not the default
if(inst->isVset()){

if(inst->getSourceOpInfoList()[0].field_value != 0){
// vl is being set by register, need to block
waiting_on_vset_ = true;
}
else if(inst->getSourceOpInfoList()[0].field_value == 0 && inst->getDestOpInfoList()[0].field_value != 0){
// set vl to vlmax, no need to block
VCSRs_.vl = Inst::VLMAX;
}
}
if (inst->getMnemonic() == "vsetvl")
{
// vsetvl depends on register values for VTYPE, need to wait till execution
waiting_on_vset_ = true;
if(uop_queue_.size() > 0){
const auto & inst = uop_queue_.read(0);
insts->emplace_back(inst);
inst->setStatus(Inst::Status::DECODED);
ILOG("From UOp Queue Decoded: " << inst);
uop_queue_.pop();
}
else{
const auto & inst = fetch_queue_.read(0);
// if we're waiting on a vset, but it's a scalar instruction
// we can process all scalars after the vset until we reach a vset the decode queue
if(inst->isVset() && inst->getSourceOpInfoList()[0].field_value == 0 && inst->getDestOpInfoList()[0].field_value != 0){
// set vl to vlmax, no need to block
VCSRs_.vl = Inst::VLMAX;
}
else if (inst->isVector())
if (!inst->isVset() && inst->isVector())
{
// set LMUL, VSET, VL
inst->setVCSRs(VCSRs_);
}
if (VCSRs_.lmul > 1)
if (inst->getLMUL() > 1 && !inst->isVset())
{
// lmul > 1, fracture instruction into UOps
inst->setUOp(true); // mark instruction to denote it has UOPs
if (uop_queue_credits_ - i > VCSRs_.lmul)
// turn this into a state machine
// send them out based on credit
// state indicating if we're decoding as normal or draining a UOp Queue
ILOG("Inst: " << inst << " is being split into " << VCSRs_.lmul << " UOPs");
// we can process the lmul, we subtract from uop_queue_credits_
// because num_decode is min of both fetch queue and uop_queue_credits_
// which doesn't factor in the uop amount per instruction
insts->emplace_back(inst);
inst->setStatus(Inst::Status::DECODED);
fetch_queue_.pop();
for (uint32_t j = 1; j < VCSRs_.lmul; ++j)
{
ILOG("Inst: " << inst << " is being split into " << VCSRs_.lmul << " UOPs");
// we can process the lmul, we subtract from uop_queue_credits_
// because num_decode is min of both fetch queue and uop_queue_credits_
// which doesn't factor in the uop amount per instruction
for (uint32_t j = 1; j < VCSRs_.lmul; ++j)
i++;
// we create lmul - 1 instructions, because the original instruction
// will also be executed, so we start creating UOPs at vector
// registers + 1 until LMUL
MavisType* mavis_facade_ = getMavis(getContainer());
const std::string mnemonic = inst->getMnemonic();
auto srcs = inst->getSourceOpInfoList();
// determine different modes of agnostic vs undistrubed
// parameter in simulator for setting ^
for (auto & src : srcs)
{
src.field_value += j;
}
auto dests = inst->getDestOpInfoList();
for (auto & dest : dests)
{
// we create lmul - 1 instructions, because the original instruction
// will also be executed, so we start creating UOPs at vector
// registers + 1 until LMUL
MavisType* mavis_facade_ = getMavis(getContainer());
const std::string mnemonic = inst->getMnemonic();
auto srcs = inst->getSourceOpInfoList();
for (auto & src : srcs)
{
src.field_value += j;
}
auto dests = inst->getDestOpInfoList();
for (auto & dest : dests)
{
dest.field_value += j;
}
const auto imm = inst->getImmediate();
mavis::ExtractorDirectOpInfoList ex_info(mnemonic, srcs, dests,
imm);
InstPtr new_inst =
mavis_facade_->makeInstDirectly(ex_info, getClock());
InstPtr inst_uop_ptr(new Inst(*new_inst));
inst_uop_ptr->setVCSRs(VCSRs_);
inst_uop_ptr->setUOpID(j);
inst->appendUOp(inst_uop_ptr);
dest.field_value += j;
}
const auto imm = inst->getImmediate();
mavis::ExtractorDirectOpInfoList ex_info(mnemonic, srcs, dests,
imm);
InstPtr new_inst =
mavis_facade_->makeInstDirectly(ex_info, getClock());
// setting UOp instructions to have the same UID and PID as parent instruction
new_inst->setUniqueID(inst->getUniqueID());
new_inst->setProgramID(inst->getProgramID());
InstPtr inst_uop_ptr(new Inst(*new_inst));
inst_uop_ptr->setVCSRs(VCSRs_);
inst_uop_ptr->setUOpID(j);
inst->appendUOp(inst_uop_ptr);
if(i < num_decode){
insts->emplace_back(inst_uop_ptr);
inst_uop_ptr->setStatus(Inst::Status::DECODED);
}
i += VCSRs_.lmul; // increment decode number based on lmul
}
else
{
// can't decode this instruction, not enough decode credits
// to support cracking into multiple uops
break;
else{
ILOG("Not enough decode credits to process UOp, appending to uop_queue_ " << inst_uop_ptr);
uop_queue_.push(inst_uop_ptr);
}
}
}
insts->emplace_back(inst);
inst->setStatus(Inst::Status::DECODED);
else{
insts->emplace_back(inst);
inst->setStatus(Inst::Status::DECODED);

if (fusion_enable_)
{
uids.push_back(inst->getMavisUid());
}
if (fusion_enable_)
{
uids.push_back(inst->getMavisUid());
}

ILOG("Decoded: " << inst);
ILOG("Decoded: " << inst);

fetch_queue_.pop();
}
else
{
ILOG("Stalling due to waiting on vset: " << inst);
break;
fetch_queue_.pop();
}
}
}
if (fusion_enable_)
Expand Down
5 changes: 4 additions & 1 deletion core/Decode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ namespace olympia
//! \brief depth of the input instruction buffer
PARAMETER(uint32_t, fetch_queue_size, 10, "Size of the fetch queue")

//! \brief depth of UOp Queue
PARAMETER(uint32_t, uop_queue_size, 8, "Size of the UOp queue")

//! \brief enable fusion operations
//!
//! master enable, when false fusion_* parmeters have no effect
Expand Down Expand Up @@ -144,6 +147,7 @@ namespace olympia
private:
// The internal instruction queue
InstQueue fetch_queue_;
InstQueue uop_queue_;

// Port listening to the fetch queue appends - Note the 1 cycle delay
sparta::DataInPort<InstGroupPtr> fetch_queue_write_in_{&unit_port_set_,
Expand Down Expand Up @@ -333,7 +337,6 @@ namespace olympia
void handleFlush_(const FlushManager::FlushingCriteria & criteria);

uint32_t uop_queue_credits_ = 0;
bool waiting_on_vset_ = false;
};

//! \brief the fusion functor/function objects
Expand Down
5 changes: 3 additions & 2 deletions core/ExecutePipe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ namespace olympia
cpu_node = getContainer()->getRoot();
}
for (uint32_t rf = 0; rf < core_types::RegFile::N_REGFILES;
++rf) // for (const auto rf : core_types::reg_files)
++rf)
{
// alu0, alu1 name is based on exe names, point to issue_queue name instead
scoreboard_views_[rf].reset(
Expand Down Expand Up @@ -77,7 +77,7 @@ namespace olympia
{
if (num_passes_needed_ == 0)
{
uint32_t num_passes = std::ceil((ex_inst->getVL()/ex_inst->getSEW()) / valu_adder_num_);
const uint32_t num_passes = std::ceil((ex_inst->getVL()/ex_inst->getSEW()) / valu_adder_num_);
if (num_passes > 1)
{
// only care about cases with multiple passes
Expand Down Expand Up @@ -120,6 +120,7 @@ namespace olympia
{
if (num_passes_needed_ != 0)
{
// reseting counters once vector instruction needing more than 1 pass
curr_num_pass_ = 0;
num_passes_needed_ = 0;
}
Expand Down
66 changes: 54 additions & 12 deletions core/Fetch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ namespace olympia
sparta::Unit(node),
num_insts_to_fetch_(p->num_to_fetch),
skip_nonuser_mode_(p->skip_nonuser_mode),
my_clk_(getClock())
my_clk_(getClock()),
fetched_queue_("FetchedQueue", p->fetched_queue_size, node->getClock(), &unit_stat_set_)
{
in_fetch_queue_credits_.
registerConsumerHandler(CREATE_SPARTA_HANDLER_WITH_DATA(Fetch, receiveFetchQueueCredits_, uint32_t));
Expand All @@ -32,6 +33,8 @@ namespace olympia

fetch_inst_event_.reset(new sparta::SingleCycleUniqueEvent<>(&unit_event_set_, "fetch_random",
CREATE_SPARTA_HANDLER(Fetch, fetchInstruction_)));
in_vset_inst_.registerConsumerHandler(
CREATE_SPARTA_HANDLER_WITH_DATA(Fetch, process_vset_, InstPtr));
// Schedule a single event to start reading from a trace file
sparta::StartupEvent(node, CREATE_SPARTA_HANDLER(Fetch, initialize_));

Expand All @@ -51,7 +54,7 @@ namespace olympia

fetch_inst_event_->schedule(1);
}

void Fetch::fetchInstruction_()
{
const uint32_t upper = std::min(credits_inst_queue_, num_insts_to_fetch_);
Expand All @@ -62,16 +65,48 @@ namespace olympia
InstGroupPtr insts_to_send = sparta::allocate_sparta_shared_pointer<InstGroup>(instgroup_allocator);
for(uint32_t i = 0; i < upper; ++i)
{
InstPtr ex_inst = inst_generator_->getNextInst(my_clk_);
if(SPARTA_EXPECT_TRUE(nullptr != ex_inst))
{
ex_inst->setSpeculative(speculative_path_);
insts_to_send->emplace_back(ex_inst);

ILOG("Sending: " << ex_inst << " down the pipe");
}
else {
break;
if(!waiting_on_vset_ || fetched_queue_.size() < fetched_queue_.capacity()){
// Note -> should we change this to block after the first vector instruction after
// a vset is detected
InstPtr ex_inst = nullptr;
if(fetched_queue_.size() > 0){
// if we have already fetched instructions, we should process those first
ex_inst = fetched_queue_.read(0);
fetched_queue_.pop();
}
else{
ex_inst = inst_generator_->getNextInst(my_clk_);
}
if(SPARTA_EXPECT_TRUE(nullptr != ex_inst)){
if ((!waiting_on_vset_) || (waiting_on_vset_ && !ex_inst->isVector()))
{
// we only need to stall for vset when it's
// vsetvl or a vset{i}vl{i} that has a vl that is not the default
// any imms can be decoded here and we don't have to stall vset
// check if indirect vset
// move stalling check to fetch, fetch has to break it up, once one direct vset is detected
if(ex_inst->isVset()){
if(ex_inst->getSourceOpInfoList()[0].field_value != 0 || ex_inst->getOpCodeInfo()->getInstructionUniqueID() == 315){
// vl is being set by register, need to block
// vsetvl in mavis -> give it a mavis id, if mavisid == vsetvl number
waiting_on_vset_ = true;
}
}
ex_inst->setSpeculative(speculative_path_);
insts_to_send->emplace_back(ex_inst);

ILOG("Sending: " << ex_inst << " down the pipe");
}
else{
// store fetched instruction in queue
fetched_queue_.push((ex_inst));
break;
}
}
else{
ILOG("Stalling due to waiting on vset");
break;
}
}
}

Expand All @@ -95,6 +130,13 @@ namespace olympia
}
}

void Fetch::process_vset_(const InstPtr & inst)
{
waiting_on_vset_ = false;
ILOG("Recieved VSET from ExecutePipe, resuming fetching");
// schedule fetch, because we've been stalled on vset
fetch_inst_event_->schedule(sparta::Clock::Cycle(0));
}
// Called when decode has room
void Fetch::receiveFetchQueueCredits_(const uint32_t & dat) {
credits_inst_queue_ += dat;
Expand Down
Loading

0 comments on commit daaa41f

Please sign in to comment.