Skip to content

Commit

Permalink
Merge branch 'libretro:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
ethanaobrien authored Jul 23, 2024
2 parents 94b048d + d6f625a commit 7b2102a
Show file tree
Hide file tree
Showing 21 changed files with 103 additions and 86 deletions.
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ ifeq "$(ASAN)" "1"
else
ifeq "$(DEBUG)" "0"
CFLAGS += -O3 -DNDEBUG
else
CFLAGS += -O1
endif
endif
LD = $(CC)
Expand Down
2 changes: 1 addition & 1 deletion configure
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ fi
#fi

if [ -z "$ARCH" ]; then
ARCH=`$CC -dumpmachine | awk -F '-' '{print $1}'`
ARCH=`$CC $MFLAGS $CFLAGS -dumpmachine | awk -F '-' '{print $1}'`
fi

# CPU/ABI stuff first, else compile test may fail
Expand Down
17 changes: 9 additions & 8 deletions cpu/drc/emit_x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -934,11 +934,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common

#define emith_call_reg(r) do { \
EMIT_REX_IF(0, 0, r); \
EMIT_OP_MODRM(0xff, 3, 2, (r)&7); \
EMIT_OP_MODRM64(0xff, 3, 2, r); \
} while (0)

#define emith_abicall_ctx(offs) do { \
EMIT_OP_MODRM(0xff, 2, 2, CONTEXT_REG); \
EMIT_REX_IF(0, 0, CONTEXT_REG); \
EMIT_OP_MODRM64(0xff, 2, 2, CONTEXT_REG); \
EMIT(offs, u32); \
} while (0)

Expand All @@ -955,11 +956,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common

#define emith_jump_reg(r) do { \
EMIT_REX_IF(0, 0, r); \
EMIT_OP_MODRM(0xff, 3, 4, (r)&7); \
EMIT_OP_MODRM64(0xff, 3, 4, r); \
} while (0)

#define emith_jump_ctx(offs) do { \
EMIT_OP_MODRM(0xff, 2, 4, CONTEXT_REG); \
EMIT_REX_IF(0, 0, CONTEXT_REG); \
EMIT_OP_MODRM64(0xff, 2, 4, CONTEXT_REG); \
EMIT(offs, u32); \
} while (0)

Expand Down Expand Up @@ -1209,13 +1211,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common
} while (0)

#define emith_sh2_rcall(a, tab, func, mask) do { \
int scale_ = PTR_SCALE <= 2 ? PTR_SCALE : 2; \
emith_lsr(mask, a, SH2_READ_SHIFT); \
if (PTR_SCALE > scale_) emith_lsl(mask, mask, PTR_SCALE-scale_); \
EMIT_XREX_IF(1, tab, tab, mask); \
EMIT_OP_MODRM64(0x8d, 0, tab, 4); \
EMIT_SIB64(PTR_SCALE, mask, tab); /* lea tab, [tab + mask * {4,8}] */ \
EMIT_XREX_IF(1, tab, tab, mask); \
EMIT_OP_MODRM64(0x8d, 0, tab, 4); \
EMIT_SIB64(PTR_SCALE, mask, tab); /* lea tab, [tab + mask * {4,8}] */ \
EMIT_SIB64(scale_+1, mask, tab); /* lea tab, [tab + mask*(2*scale)] */ \
EMIT_REX_IF(1, func, tab); \
emith_deref_modrm(0x8b, 0, func, tab); /* mov func, [tab] */ \
EMIT_REX_IF(0, mask, tab); \
Expand Down
8 changes: 4 additions & 4 deletions cpu/sh2/compiler.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ static char sh2dasm_buff[64];
(ulong)(sh2)->r[8], (ulong)(sh2)->r[9], (ulong)(sh2)->r[10], (ulong)(sh2)->r[11], \
(ulong)(sh2)->r[12], (ulong)(sh2)->r[13], (ulong)(sh2)->r[14], (ulong)(sh2)->r[15]); \
printf("%csh2 pc-ml %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", ms, \
(ulong)(sh2)->pc, (ulong)(sh2)->ppc, (ulong)(sh2)->pr, (ulong)(sh2)->sr&0xfff, \
(ulong)(sh2)->pc, (ulong)(sh2)->ppc, (ulong)(sh2)->pr, (ulong)(sh2)->sr, \
(ulong)(sh2)->gbr, (ulong)(sh2)->vbr, (ulong)(sh2)->mach, (ulong)(sh2)->macl); \
printf("%csh2 tmp-p %08x %08x %08x %08x %08x %08lx %08x %08x\n", ms, \
(sh2)->drc_tmp, (sh2)->irq_cycles, \
Expand Down Expand Up @@ -269,12 +269,14 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr)
SH2_DUMP(&fsh2, "file");
SH2_DUMP(sh2, "current");
SH2_DUMP(&csh2[idx][0], "previous");
SH2_DUMP(&csh2[idx][1], "previous");
char *ps = (char *)sh2, *pf = (char *)&fsh2;
for (idx = 0; idx < offsetof(SH2, read8_map); idx += sizeof(u32))
if (*(u32 *)(ps+idx) != *(u32 *)(pf+idx))
printf("diff reg %ld\n",(long)idx/sizeof(u32));
exit(1);
}
memcpy(&csh2[idx][1], &csh2[idx][0], offsetof(SH2, poll_cnt)+4);
csh2[idx][0] = fsh2;
}
}
Expand Down Expand Up @@ -4649,6 +4651,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id)
FLUSH_CYCLES(sr);
rcache_get_reg_arg(0, GET_Rn(), NULL);
tmp = emit_memhandler_read(0);
sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL);
emith_clr_t_cond(sr);
emith_cmp_r_imm(tmp, 0);
emith_set_t_cond(sr, DCOND_EQ);
Expand Down Expand Up @@ -5262,8 +5265,6 @@ static void sh2_generate_utils(void)
host_arg2reg(arg1, 1);
host_arg2reg(arg2, 2);
host_arg2reg(arg3, 3);
emith_move_r_r(arg0, arg0); // nop
emith_flush();

// sh2_drc_write8(u32 a, u32 d)
sh2_drc_write8 = (void *)tcache_ptr;
Expand Down Expand Up @@ -5645,7 +5646,6 @@ static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, int free)
u32 start_lit, end_lit;
struct block_desc *block;
int removed = 0, rest;
u32 _a = a;

// ignore cache-through
a &= wtmask;
Expand Down
7 changes: 4 additions & 3 deletions cpu/sh2/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,10 @@ u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc,
#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)
#define DRC_SR_REG "r28"
#define DRC_REG_LL 0 // no ABI for __ILP32__
#elif defined(__i386__)
#define DRC_SR_REG "edi"
#define DRC_REG_LL 0 // 32 bit
//i386 only has 8 registers and reserving one of them causes too much spilling
//#elif defined(__i386__)
//#define DRC_SR_REG "edi"
//#define DRC_REG_LL 0 // 32 bit
#elif defined(__x86_64__)
#define DRC_SR_REG "rbx"
#define DRC_REG_LL (__ILP32__ || _WIN32)
Expand Down
5 changes: 2 additions & 3 deletions pico/draw.c
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,7 @@ static void DrawLayer(int plane_sh, u32 *hcache, int cellskip, int maxcells,
ts.nametab+=(ts.line>>4)<<shift[width];

DrawStripInterlace(&ts, plane_sh);
} else if( pvid->reg[11]&4) {
} else if (pvid->reg[11]&4) {
// shit, we have 2-cell column based vscroll
// luckily this doesn't happen too often
ts.line=ymask|(shift[width]<<24); // save some stuff instead of line
Expand Down Expand Up @@ -2125,7 +2125,6 @@ void PicoDrawSync(int to, int off, int on)
{
int width2 = (est->Pico->video.reg[12]&1) ? 160 : 128;

// technically, VDP starts active display output at slot 12
if (unlikely(on|off) && (off >= width2 ||
// hack for timing inaccuracy, if on/off near borders
(off && off <= 24) || (on < width2 && on >= width2-24)))
Expand Down Expand Up @@ -2178,7 +2177,7 @@ void PicoDrawBgcDMA(u16 *base, u32 source, u32 mask, int dlen, int sl)
BgcDMAoffs = 0;

// handle slot offset in 1st line
if (sl-12 > 0) // active display output only starts at slot 12
if (sl-12 > 0)
BgcDMAoffs = 2*(sl-12);
else if (sl < 0) { // DMA starts before active display
BgcDMAsrc += 2*-sl;
Expand Down
19 changes: 6 additions & 13 deletions pico/draw_arm.S
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,6 @@ DrawLayer:
subs r10,r10, #0x01000000
bmi .dsloop_exit

.dsloop_enter:
and r7, r5, r8
add r7, lr, r7, lsl #1 @ Pico.vram+((tilex&ts->xmask) as halfwords)
ldrh r7, [r7, r12] @ r7=code (int, but from unsigned, no sign extend)
Expand Down Expand Up @@ -1385,6 +1384,9 @@ DrawWindow:
and r10, r10, #7
mov r10, r10, lsl #1 @ r10=ty

ldr r6, [r11, #OFS_EST_rendstatus]
ldr lr, [r11, #OFS_EST_PicoMem_vram]

mov r12, r12, lsl #10

tst r4, #1 @ 40 cell mode?
Expand All @@ -1394,12 +1396,6 @@ DrawWindow:
addeq r12, r12, r5, lsl #6 @ nametab
add r12, r12, r0, lsl #2 @ +starttile

ldr lr, [r11, #OFS_EST_PicoMem_vram]
ldr r6, [r11, #OFS_EST_rendstatus]

@ fetch the first code now
ldrh r7, [lr, r12]

ands r6, r6, #PDRAW_WND_DIFF_PRIO
cmpeq r2, #1 @ prio && !(rendstatus & WND_DIFF_PRIO)?
ldmeqfd sp!, {r4-r11,pc} @ yes, assume that whole window uses same priority
Expand All @@ -1412,20 +1408,18 @@ DrawWindow:
@ cache some stuff to avoid mem access
ldr r11, [r11, #OFS_EST_HighCol]
mov r8, r8, lsl #1 @ cells
add r11,r11,#8
mvn r9, #0 @ r9=prevcode=-1
add r1, r11, r0, lsl #4 @ r1=pdest
add r1, r11, r0, lsl #4 @ r1=pdest=HighCol+starttile (+8 added in loop)
mov r0, #0xf
b .dwloop_enter

@ r4,r5 are scratch in this loop
.dwloop:
add r1, r1, #8
.dwloop_nor1:
add r12, r12, #2 @ halfwords
ldrh r7, [lr, r12] @ r7=code (int, but from unsigned, no sign extend)
add r12, r12, #2 @ halfwords
subs r8, r8, #1
beq .dwloop_end @ done
bmi .dwloop_end @ done

eor r5, r6, r7, lsr #15
tst r5, #1
Expand All @@ -1435,7 +1429,6 @@ DrawWindow:
cmp r7, r9
beq .dw_samecode @ we know stuff about this tile already

.dwloop_enter:
mov r9, r7 @ remember code

movs r2, r9, lsl #20 @ if (code&0x1000)
Expand Down
2 changes: 1 addition & 1 deletion pico/media.c
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ enum media_type_e PicoLoadMedia(const char *filename,
// simple test for GG. Do this here since m.hardware is nulled in Insert
if ((PicoIn.AHW & PAHW_SMS) && !PicoIn.hwSelect) {
const char *ext = NULL;
if (rom_file && rom_file->ext && (*rom_file->ext != '\0')) {
if (rom_file && (*rom_file->ext != '\0')) {
ext = rom_file->ext;
}
else if ((ext = strrchr(filename, '.'))) {
Expand Down
21 changes: 9 additions & 12 deletions pico/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,6 @@ void NOINLINE ctl_write_z80reset(u32 d)
pprof_end_sub(m68k);
}
Pico.t.z80_busdelay &= 0xff; // also resets bus request
Pico.video.status &= ~PVS_Z80WAIT;
YM2612ResetChip();
timers_reset();
}
Expand Down Expand Up @@ -1174,7 +1173,7 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80)

// the busy flag in the YM2612 status is actually a 32 cycle timer
// (89.6 Z80 cycles), triggered by any write to the data port.
Pico.t.ym2612_busy = (cycles + 90) << 8; // Q8 for convenience
Pico.t.ym2612_busy = (cycles << 8) + YMBUSY_ZCYCLES; // Q8 for convenience

switch (addr)
{
Expand Down Expand Up @@ -1284,9 +1283,11 @@ static u32 ym2612_read_local_68k(void)
void ym2612_pack_state(void)
{
// timers are saved as tick counts, in 16.16 int format
int tac, tat = 0, tbc, tbt = 0;
int tac, tat = 0, tbc, tbt = 0, busy = 0;
tac = 1024 - ym2612.OPN.ST.TA;
tbc = 256 - ym2612.OPN.ST.TB;
if (Pico.t.ym2612_busy > 0)
busy = cycles_z80_to_68k(Pico.t.ym2612_busy);
if (Pico.t.timer_a_next_oflow != TIMER_NO_OFLOW)
tat = (int)((double)(Pico.t.timer_a_step - Pico.t.timer_a_next_oflow)
/ (double)Pico.t.timer_a_step * tac * 65536);
Expand All @@ -1301,12 +1302,12 @@ void ym2612_pack_state(void)
YM2612PicoStateSave2_940(tat, tbt);
else
#endif
YM2612PicoStateSave2(tat, tbt);
YM2612PicoStateSave2(tat, tbt, busy);
}

void ym2612_unpack_state(void)
{
int i, ret, tac, tat, tbc, tbt;
int i, ret, tac, tat, tbc, tbt, busy = 0;
YM2612PicoStateLoad();

// feed all the registers and update internal state
Expand Down Expand Up @@ -1336,12 +1337,13 @@ void ym2612_unpack_state(void)
ret = YM2612PicoStateLoad2_940(&tat, &tbt);
else
#endif
ret = YM2612PicoStateLoad2(&tat, &tbt);
ret = YM2612PicoStateLoad2(&tat, &tbt, &busy);
if (ret != 0) {
elprintf(EL_STATUS, "old ym2612 state");
return; // no saved timers
}

Pico.t.ym2612_busy = cycles_68k_to_z80(busy);
tac = (1024 - ym2612.OPN.ST.TA) << 16;
tbc = (256 - ym2612.OPN.ST.TB) << 16;
if (ym2612.OPN.ST.mode & 1)
Expand Down Expand Up @@ -1370,11 +1372,6 @@ void PicoWrite16_32x(u32 a, u32 d) {}
static void access_68k_bus(int delay) // bus delay as Q8
{
// TODO: if the 68K is in DMA wait, Z80 has to wait until DMA ends
if (Pico.video.status & (PVS_CPUWR|PVS_CPURD)) {
z80_subCLeft(z80_cyclesLeft); // rather rough on both condition and action
// TODO the next line will cause audio lag in Overdrive 2 demo?
//Pico.video.status |= PVS_Z80WAIT;
}

// 68k bus access delay for z80. The fractional part needs to be accumulated
// until an additional cycle is full. That is then added to the integer part.
Expand All @@ -1383,7 +1380,7 @@ static void access_68k_bus(int delay) // bus delay as Q8
Pico.t.z80_busdelay &= 0xff; // leftover cycle fraction
// don't use SekCyclesBurn() here since the Z80 doesn't run in cycle lock to
// the 68K. Count the stolen cycles to be accounted later in the 68k CPU runs
Pico.t.z80_buscycles += 8;
Pico.t.z80_buscycles += 8; // TODO <=8.4 for Rick 2, but >=8.9 for misc_test
}

static unsigned char z80_md_vdp_read(unsigned short a)
Expand Down
1 change: 1 addition & 0 deletions pico/pico.c
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ void PicoLoopPrepare(void)
Pico.t.vcnt_wrap = 0xEB;
Pico.t.vcnt_adj = 6;
}
PicoVideoFIFOMode(Pico.video.reg[1]&0x40, Pico.video.reg[12]&1);

Pico.m.dirtyPal = 1;
rendstatus_old = -1;
Expand Down
6 changes: 0 additions & 6 deletions pico/pico_cmn.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,6 @@ static void do_timing_hacks_start(struct PicoVideo *pv)
int cycles = PicoVideoFIFOHint();

SekCyclesBurn(cycles); // prolong cpu HOLD if necessary
if (pv->status & PVS_Z80WAIT) {
Pico.t.z80c_cnt += cycles_68k_to_z80(cycles);
if (!(pv->status & (PVS_CPUWR|PVS_CPURD)))
pv->status &= ~PVS_Z80WAIT;
}

// XXX how to handle Z80 bus cycle stealing during DMA correctly?
if ((Pico.t.z80_buscycles -= cycles) < 0)
Pico.t.z80_buscycles = 0;
Expand Down
12 changes: 7 additions & 5 deletions pico/pico_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ extern struct DrZ80 drZ80;

// 68k clock = OSC/7, z80 clock = OSC/15, 68k:z80 ratio = 7/15 = 3822.9/8192
#define cycles_68k_to_z80(x) ((x) * 3823 >> 13)
#define cycles_z80_to_68k(x) ((x) * 8777 >> 12)

// ----------------------- SH2 CPU -----------------------

Expand Down Expand Up @@ -299,7 +300,6 @@ extern SH2 sh2s[2];
#define PVS_DMAFILL (1 << 20) // DMA fill is waiting for fill data
#define PVS_DMABG (1 << 21) // background DMA operation is running
#define PVS_FIFORUN (1 << 22) // FIFO is processing
#define PVS_Z80WAIT (1 << 23) // Z80 blocked by VDP DMA

struct PicoVideo
{
Expand Down Expand Up @@ -898,10 +898,12 @@ void ym2612_unpack_state(void);

#define TIMER_NO_OFLOW 0x70000000

// tA = 72 * (1024 - TA) / M, with M = mclock/2
#define TIMER_A_TICK_ZCYCLES cycles_68k_to_z80(256LL* 72*2) // Q8
// tB = 16*72 * ( 256 - TB) / M
#define TIMER_B_TICK_ZCYCLES cycles_68k_to_z80(256LL*16*72*2) // Q8
// tA = 24*3 * (1024 - TA) / M, with M = mclock/2
#define TIMER_A_TICK_ZCYCLES cycles_68k_to_z80(256LL* 24*3*2) // Q8
// tB = 16*24*3 * ( 256 - TB) / M
#define TIMER_B_TICK_ZCYCLES cycles_68k_to_z80(256LL*16*24*3*2) // Q8
// busy = 32*3 / M
#define YMBUSY_ZCYCLES cycles_68k_to_z80(256LL* 32*3*2) // Q8

#define timers_cycle(ticks) \
if (Pico.t.ym2612_busy > 0) \
Expand Down
5 changes: 4 additions & 1 deletion pico/sek.c
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,8 @@ PICO_INTERNAL void SekPackCpu(unsigned char *cpu, int is_sub)
*(u32 *)(cpu+0x50) = SekCycleCntS68k;
*(s16 *)(cpu+0x4e) = SekCycleCntS68k - SekCycleAimS68k;
} else {
*(u32 *)(cpu+0x50) = Pico.t.m68c_cnt;
*(u32 *)(cpu+0x50) = Pico.t.m68c_cnt + Pico.t.z80_buscycles +
((Pico.t.refresh_delay + (1<<14)/2) >> 14);
*(s16 *)(cpu+0x4e) = Pico.t.m68c_cnt - Pico.t.m68c_aim;
}
}
Expand Down Expand Up @@ -266,6 +267,8 @@ PICO_INTERNAL void SekUnpackCpu(const unsigned char *cpu, int is_sub)
} else {
Pico.t.m68c_cnt = *(u32 *)(cpu+0x50);
Pico.t.m68c_aim = Pico.t.m68c_cnt - *(s16 *)(cpu+0x4e);
Pico.t.z80_buscycles = 0;
Pico.t.refresh_delay = 0;
}
}

Expand Down
Loading

0 comments on commit 7b2102a

Please sign in to comment.