Skip to content

Commit

Permalink
PPU LLVM: Recycle identical functions
Browse files Browse the repository at this point in the history
  • Loading branch information
elad335 committed Mar 12, 2024
1 parent 1643fad commit b1c7f88
Show file tree
Hide file tree
Showing 5 changed files with 214 additions and 10 deletions.
134 changes: 134 additions & 0 deletions rpcs3/Emu/Cell/PPUAnalyser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2046,6 +2046,140 @@ bool ppu_module::analyse(u32 lib_toc, u32 entry, const u32 sec_end, const std::b
}

ppu_log.notice("Block analysis: %zu blocks (%zu enqueued)", funcs.size(), block_queue.size());

std::unordered_map<std::string_view, std::pair<u32, u32>> duplicate_data_map;
duplicate_map.clear();

for (auto& func : funcs)
{
if (func.size == 0 || func.size > 10000u)
{
continue;
}

auto& data = duplicate_data_map[std::string_view{get_ptr<char>(func.addr), func.size}];

const usz count = data.first;

if (!count)
{
data.first++;
data.second = func.addr;
continue;
}

if (!data.second)
{
continue;
}

if (count == 1)
{
const u32 faddr = func.addr;
const u32 fend = func.addr + func.size;

bool fail = false;

//for (const auto [addr, size] : func.blocks)
const u32 addr = func.addr;
const u32 size = func.size;
{
if (size == 0)
{
continue;
}

auto i_ptr = ensure(get_ptr<u32>(addr));

for (u32 i = addr; i < addr + size; i += 4, i_ptr++)
{
const ppu_opcode_t op{*i_ptr};
const auto itype = s_ppu_itype.decode(op.opcode);

if (itype != ppu_itype::BC && itype != ppu_itype::B)
{
if (i == fend - 4)
{
if (!(itype & ppu_itype::branch) && itype != ppu_itype::SC)
{
// Inserts a branch to following code
fail = true;
break;
}
}

continue;
}

const u32 target = (op.aa ? 0 : i) + (itype == ppu_itype::B ? +op.bt24 : +op.bt14);

if (target >= fend || target < faddr)
{
fail = true;
break;
}

if (itype == ppu_itype::BC && (op.bo & 0x14) != 0x14)
{
if (i == fend - 4)
{
// Can branch to next
fail = true;
break;
}
}
}
}

if (fail)
{
data.first = 1;
data.second = 0;
continue;
}
}

data.first++;

// Choose the lowest function as the source
data.second = std::min<u32>(data.second, func.addr);
}

usz dups_count = 0;

for (auto& func : funcs)
{
if (func.size == 0 || func.size > 10000u)
{
continue;
}

const auto data = ::at32(duplicate_data_map, std::string_view{get_ptr<char>(func.addr), func.size});

if (data.first > 1)
{
duplicate_map[func.addr] = data.second;

for (const auto [addr, size] : func.blocks)
{
if (size == 0 || addr >= func.addr + func.size)
{
continue;
}

duplicate_map[addr] = data.second + (addr - func.addr);
}

if (func.addr != data.second)
{
dups_count++;
}

ppu_log.trace("Found PPU function duplicate: func 0x%x vs 0x%x (%d times) (size=%d)", func.addr, data.second, data.first, func.size);
}
}

ppu_log.success("Function duplication count: %d/%d (%g%)", dups_count, duplicate_data_map.size(), dups_count * 100.0 / duplicate_data_map.size());
return true;
}

Expand Down
2 changes: 2 additions & 0 deletions rpcs3/Emu/Cell/PPUAnalyser.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ struct ppu_module
std::vector<ppu_function> funcs{};
std::deque<std::shared_ptr<void>> allocations;
std::map<u32, u32> addr_to_seg_index;
std::unordered_map<u32, u32> duplicate_map;

// Copy info without functions
void copy_part(const ppu_module& info)
Expand All @@ -107,6 +108,7 @@ struct ppu_module
secs = info.secs;
allocations = info.allocations;
addr_to_seg_index = info.addr_to_seg_index;
duplicate_map = info.duplicate_map;
}

bool analyse(u32 lib_toc, u32 entry, u32 end, const std::basic_string<u32>& applied, std::function<bool()> check_aborted = {});
Expand Down
52 changes: 46 additions & 6 deletions rpcs3/Emu/Cell/PPUThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4146,7 +4146,7 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
}

ppu_log.notice("Failed to precompile '%s' (prx: %s, ovl: %s): Attempting tratment as executable file", path, prx_err, ovl_err);
possible_exec_file_paths.push(path, offset, file_size);
possible_exec_file_paths.push(file_queue[func_i]);
inc_fdone = 0;
}
});
Expand Down Expand Up @@ -4643,8 +4643,15 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size)
// Copy block or function entry
ppu_function& entry = part.funcs.emplace_back(func);

u32 og_func = entry.addr;

if (auto it = info.duplicate_map.find(entry.addr); it != info.duplicate_map.end())
{
og_func = it->second;
}

// Fixup some information
entry.name = fmt::format("__0x%x", entry.addr - reloc);
entry.name = fmt::format("__0x%x", og_func - reloc);

if (has_mfvscr && g_cfg.core.ppu_set_sat_bit)
{
Expand Down Expand Up @@ -4808,7 +4815,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size)
settings += ppu_settings::accurate_nj_mode, settings -= ppu_settings::fixup_nj_denormals, fmt::throw_exception("NJ Not implemented");

// Write version, hash, CPU, settings
fmt::append(obj_name, "v6-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
fmt::append(obj_name, "v7-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
}

if (cpu ? cpu->state.all_of(cpu_flag::exit) : Emu.IsStopped())
Expand Down Expand Up @@ -5037,6 +5044,8 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size)

bool early_exit = false;

std::map<std::string, ppu_intrp_func_t> func_ptr_map;

// Get and install function addresses
for (const auto& func : info.funcs)
{
Expand All @@ -5054,12 +5063,29 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_size)
break;
}

const auto name = fmt::format("__0x%x", func.addr - reloc);
u32 og_func = func.addr;

if (auto it = info.duplicate_map.find(func.addr); it != info.duplicate_map.end())
{
og_func = it->second;
}

const auto name = fmt::format("__0x%x", og_func - reloc);

ppu_intrp_func_t dummy{};
ppu_intrp_func_t& func_ptr = is_first ? func_ptr_map[name] : dummy;

// Try to locate existing function if it is not the first time
const auto addr = is_first ? ensure(reinterpret_cast<ppu_intrp_func_t>(jit->get(name)))
: reinterpret_cast<ppu_intrp_func_t>(ensure(jit_mod.funcs[index]));
const auto addr = is_first ? (func_ptr ? func_ptr : (reinterpret_cast<ppu_intrp_func_t>(jit->get(name))))
: reinterpret_cast<ppu_intrp_func_t>(jit_mod.funcs[index]);

if (!addr)
{
ppu_log.fatal("Failed to retrieve symbol address at 0x%x (duplicate=0x%x)", func.addr, info.duplicate_map.contains(func.addr) ? og_func : 0);
ensure(addr);
}

func_ptr = addr;
jit_mod.funcs.emplace_back(addr);

if (func.size == 4 && !BLR_func && *info.get_ptr<u32>(func.addr) == ppu_instructions::BLR())
Expand Down Expand Up @@ -5148,6 +5174,11 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
{
if (func.size)
{
if (auto it = module_part.duplicate_map.find(func.addr); it != module_part.duplicate_map.end() && it->second != it->first)
{
continue;
}

const auto f = cast<Function>(_module->getOrInsertFunction(func.name, _func).getCallee());
f->setCallingConv(CallingConv::GHC);
f->addParamAttr(1, llvm::Attribute::NoAlias);
Expand Down Expand Up @@ -5194,6 +5225,15 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co

if (module_part.funcs[fi].size)
{
const u32 faddr = module_part.funcs[fi].addr;
auto it = module_part.duplicate_map.find(faddr);

if (it != module_part.duplicate_map.end() && it->second != faddr)
{
ppu_log.trace("LLVM: Function 0x%x was skipped (duplicate)", faddr);
continue;
}

// Translate
if (const auto func = translator.Translate(module_part.funcs[fi]))
{
Expand Down
33 changes: 29 additions & 4 deletions rpcs3/Emu/Cell/PPUTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ Function* PPUTranslator::Translate(const ppu_function& info)
const u64 base = m_reloc ? m_reloc->addr : 0;
m_addr = info.addr - base;
m_attr = info.attr;
m_func_base = m_addr;

// Don't emit check in small blocks without terminator
bool need_check = info.size >= 16;
Expand Down Expand Up @@ -304,13 +305,29 @@ Value* PPUTranslator::VecHandleResult(Value* val)

Value* PPUTranslator::GetAddr(u64 _add)
{
if (m_reloc)
const auto old_cia = std::exchange(m_cia, nullptr);

const bool is_duplicate = m_info.duplicate_map.contains(m_func_base);
const auto cia_add = is_duplicate ? ZExt(RegLoad(m_cia)) : nullptr;
const u32 inst_diff = is_duplicate ? m_addr - m_func_base : m_addr;

// Restore value
m_cia = old_cia;

Value* addr = nullptr;

if (is_duplicate)
{
// Add to current CIA
return m_ir->CreateAdd(m_ir->getInt64(inst_diff + _add), cia_add);
}
else if (m_reloc)
{
// Load segment address from global variable, compute actual instruction address
return m_ir->CreateAdd(m_ir->getInt64(m_addr + _add), m_seg0);
return m_ir->CreateAdd(m_ir->getInt64(inst_diff + _add), m_seg0);
}

return m_ir->getInt64(m_addr + _add);
return m_ir->getInt64(inst_diff + _add);
}

Type* PPUTranslator::ScaleType(Type* type, s32 pow2)
Expand Down Expand Up @@ -419,7 +436,15 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)

if (!indirect)
{
callee = m_module->getOrInsertFunction(fmt::format("__0x%x", target_last - base), type);
const auto it = m_info.duplicate_map.find(target_last);
const u32 first_func = it == m_info.duplicate_map.end() ? target_last : it->second;

if (base)
{
ensure(first_func >= base && target_last >= base);
}

callee = m_module->getOrInsertFunction(fmt::format("__0x%x", first_func - base), type);
cast<Function>(callee.getCallee())->setCallingConv(CallingConv::GHC);
}
}
Expand Down
3 changes: 3 additions & 0 deletions rpcs3/Emu/Cell/PPUTranslator.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ class PPUTranslator final : public cpu_translator
// Current position-independent address
u64 m_addr = 0;

// Function start
u64 m_func_base = 0;

// Function attributes
bs_t<ppu_attr> m_attr{};

Expand Down

0 comments on commit b1c7f88

Please sign in to comment.