Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pbkit: Enable flip stall; expose ramin, print_char, depth buffer internals #628

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 72 additions & 51 deletions lib/pbkit/pbkit.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <hal/debug.h>
#include <stdbool.h>
#include <assert.h>
#include <winapi/synchapi.h>

#include "pbkit.h"
#include "outer.h"
Expand All @@ -42,15 +43,6 @@
#define ADDR_FBMEM 2
#define ADDR_AGPMEM 3

#define DMA_CLASS_2 2
#define DMA_CLASS_3 3
#define DMA_CLASS_3D 0x3D

#define GR_CLASS_30 0x30
#define GR_CLASS_39 0x39
#define GR_CLASS_62 0x62
#define GR_CLASS_97 0x97
#define GR_CLASS_9F 0x9F

#define GPU_IRQ 3

Expand All @@ -69,14 +61,6 @@
#define PB_SETNOISE 0xBAA
#define PB_FINISHED 0xFAB

struct s_CtxDma
{
DWORD ChannelID;
DWORD Inst; //Addr in PRAMIN area, unit=16 bytes blocks, baseaddr=VIDEO_BASE+NV_PRAMIN
DWORD Class;
DWORD isGr;
};

static unsigned int pb_ColorFmt = NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8;
static unsigned int pb_DepthFmt = NV097_SET_SURFACE_FORMAT_ZETA_Z24S8;

Expand Down Expand Up @@ -165,6 +149,7 @@ static DWORD pb_FBVFlag;
static DWORD pb_GPUFrameBuffersFormat;//encoded format for GPU
static DWORD pb_EXAddr[8]; //extra buffers addresses
static DWORD pb_ExtraBuffersCount=0;
static DWORD pb_FBSizeMultiplier = 1;

static DWORD pb_DepthStencilAddr;
static DWORD pb_DepthStencilPitch;
Expand Down Expand Up @@ -387,7 +372,7 @@ static void pb_scrollup(void)
memset(&pb_text_screen[ROWS-1][0],0,COLS);
}

static void pb_print_char(char c)
void pb_print_char(char c)
{
if (c=='\n')
{
Expand Down Expand Up @@ -690,7 +675,9 @@ static DWORD pb_gr_handler(void)

//calling XReboot() from here doesn't work well.

while(1) {};
while(1) {
Sleep(2000);
};
}
}
}
Expand Down Expand Up @@ -1344,11 +1331,11 @@ static void pb_prepare_tiles(void)



static void pb_create_dma_ctx( DWORD ChannelID,
void pb_create_dma_ctx(DWORD ChannelID,
DWORD Class,
DWORD Base,
DWORD Limit,
struct s_CtxDma *pDmaObject )
struct s_CtxDma *pDmaObject)
{
DWORD Addr;
DWORD AddrSpace;
Expand Down Expand Up @@ -1393,7 +1380,7 @@ static void pb_create_dma_ctx( DWORD ChannelID,



static void pb_bind_channel(struct s_CtxDma *pCtxDmaObject)
void pb_bind_channel(struct s_CtxDma *pCtxDmaObject)
{
DWORD entry;
DWORD *p;
Expand Down Expand Up @@ -1624,11 +1611,35 @@ static void pb_3D_init(void)
#endif
}

DWORD pb_reserve_instance(DWORD size)
{
DWORD ret = pb_FreeInst;
pb_FreeInst += (size>>4);
return ret;
}

void pb_create_gr_instance(int ChannelID,
int Class,
DWORD instance,
DWORD flags,
DWORD flags3D,
struct s_CtxDma *pGrObject)
{
DWORD offset = instance << 4;
VIDEOREG(NV_PRAMIN + offset + 0x00) = flags;
VIDEOREG(NV_PRAMIN + offset + 0x04) = flags3D;
VIDEOREG(NV_PRAMIN + offset + 0x08) = 0;
VIDEOREG(NV_PRAMIN + offset + 0x0C) = 0;

memset(pGrObject,0,sizeof(struct s_CtxDma));

pGrObject->ChannelID = ChannelID;
pGrObject->Class = Class;
pGrObject->isGr = 1;
pGrObject->Inst = instance;
}

static void pb_create_gr_ctx( int ChannelID,
void pb_create_gr_ctx( int ChannelID,
int Class,
struct s_CtxDma *pGrObject )
{
Expand Down Expand Up @@ -1660,34 +1671,22 @@ static void pb_create_gr_ctx( int ChannelID,
}
}

Inst=pb_FreeInst; pb_FreeInst+=(size>>4);
Inst = pb_reserve_instance(size);

if (flags3D)
{
pb_3DGrCtxInst[pb_FifoChannelID]=Inst;
pb_3D_init();
}


flags=Class&0x000000FF;
flags3D=0x00000000;

if (Class==GR_CLASS_39) flags|=0x01000000;

if (Class==GR_CLASS_97) flags3D=0x00000A00;

VIDEOREG(NV_PRAMIN+(Inst<<4)+0x00)=flags;
VIDEOREG(NV_PRAMIN+(Inst<<4)+0x04)=flags3D;
VIDEOREG(NV_PRAMIN+(Inst<<4)+0x08)=0;
VIDEOREG(NV_PRAMIN+(Inst<<4)+0x0C)=0;


memset(pGrObject,0,sizeof(struct s_CtxDma));

pGrObject->ChannelID=ChannelID;
pGrObject->Class=Class;
pGrObject->isGr=1;
pGrObject->Inst=Inst;
pb_create_gr_instance(ChannelID, Class, Inst, flags, flags3D, pGrObject);
}


Expand Down Expand Up @@ -2474,7 +2473,7 @@ int pb_finished(void)
p=pb_push1(p,NV20_TCL_PRIMITIVE_3D_WAIT_MAKESPACE,0); //wait/makespace (obtains null status)
p=pb_push1(p,NV20_TCL_PRIMITIVE_3D_PARAMETER_A,pb_back_index); //set param=back buffer index to show up
p=pb_push1(p,NV20_TCL_PRIMITIVE_3D_FIRE_INTERRUPT,PB_FINISHED); //subprogID PB_FINISHED: gets frame ready to show up soon
// p=pb_push1(p,NV20_TCL_PRIMITIVE_3D_STALL_PIPELINE,0); //stall gpu pipeline (not sure it's needed in triple buffering technic)
p=pb_push1(p,NV20_TCL_PRIMITIVE_3D_STALL_PIPELINE,0); //stall gpu pipeline (not sure it's needed in triple buffering technic)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this needed?

pb_end(p);

//insert in push buffer the commands to trigger selection of next back buffer
Expand Down Expand Up @@ -2628,11 +2627,18 @@ void pb_kill(void)
}


void pb_set_color_format(unsigned int fmt, bool swizzled) {
void pb_set_color_format(unsigned int fmt, bool swizzled)
{
pb_ColorFmt = fmt;
assert(swizzled == false);
}

void pb_set_fb_size_multiplier(unsigned int multiplier)
{
assert(multiplier > 0);
pb_FBSizeMultiplier = multiplier;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When would you use this?

}

int pb_init(void)
{
DWORD old;
Expand Down Expand Up @@ -2742,7 +2748,6 @@ int pb_init(void)

pb_FrameBuffersAddr=0;


pb_DmaBuffer8=MmAllocateContiguousMemoryEx(32,0,MAXRAM,0,4);
pb_DmaBuffer2=MmAllocateContiguousMemoryEx(32,0,MAXRAM,0,4);
pb_DmaBuffer7=MmAllocateContiguousMemoryEx(32,0,MAXRAM,0,4);
Expand Down Expand Up @@ -3109,6 +3114,7 @@ int pb_init(void)
pb_create_dma_ctx(8,DMA_CLASS_3D,(DWORD)pb_DmaBuffer8,0x20,&sDmaObject8);
pb_create_dma_ctx(6,DMA_CLASS_2,0,MAXRAM,&sDmaObject6);


//we initialized channel 0 first, that will match graphic context 0
pb_FifoChannelID=0;
pb_FifoChannelsMode=NV_PFIFO_MODE_ALL_PIO;
Expand Down Expand Up @@ -3253,14 +3259,14 @@ int pb_init(void)
//These commands assign DMA channels to push buffer subchannels
//and associate some specific GPU parts to specific Dma channels
p=pb_begin();
p=pb_push1_to(SUBCH_2,p,NV20_TCL_PRIMITIVE_SET_MAIN_OBJECT,14);
p=pb_push1_to(SUBCH_3,p,NV20_TCL_PRIMITIVE_SET_MAIN_OBJECT,16);
p=pb_push1_to(SUBCH_4,p,NV20_TCL_PRIMITIVE_SET_MAIN_OBJECT,17);
p=pb_push1_to(SUBCH_3D,p,NV20_TCL_PRIMITIVE_SET_MAIN_OBJECT,13);
p=pb_push1_to(SUBCH_2,p,NV20_TCL_PRIMITIVE_3D_SET_OBJECT0,7);
p=pb_push1_to(SUBCH_3,p,NV20_TCL_PRIMITIVE_3D_SET_OBJECT5,17);
p=pb_push1_to(SUBCH_3,p,NV20_TCL_PRIMITIVE_3D_SET_OBJECT_UNKNOWN,3);
p=pb_push2_to(SUBCH_4,p,NV20_TCL_PRIMITIVE_3D_SET_OBJECT1,3,11);
p=pb_push1_to(SUBCH_2,p,NV20_TCL_PRIMITIVE_SET_MAIN_OBJECT,14); // Class 39
p=pb_push1_to(SUBCH_3,p,NV20_TCL_PRIMITIVE_SET_MAIN_OBJECT,16); // Class 9F
p=pb_push1_to(SUBCH_4,p,NV20_TCL_PRIMITIVE_SET_MAIN_OBJECT,17); // Class 62
p=pb_push1_to(SUBCH_3D,p,NV20_TCL_PRIMITIVE_SET_MAIN_OBJECT,13); // Class 97
p=pb_push1_to(SUBCH_2,p,NV20_TCL_PRIMITIVE_3D_SET_OBJECT0,7); // NV039_SET_CONTEXT_DMA_NOTIFIES
p=pb_push1_to(SUBCH_3,p,NV20_TCL_PRIMITIVE_3D_SET_OBJECT5,17); // NV09F_SET_CONTEXT_SURFACES
p=pb_push1_to(SUBCH_3,p,NV20_TCL_PRIMITIVE_3D_SET_OBJECT_UNKNOWN,3); // Set operation to SRCCOPY
p=pb_push2_to(SUBCH_4,p,NV20_TCL_PRIMITIVE_3D_SET_OBJECT1,3,11); // Source ch 3, Dest ch 11
pb_end(p); //calls pb_start() which will trigger the reading and sending to GPU (asynchronous, no waiting)

//setup needed for color computations
Expand Down Expand Up @@ -3393,7 +3399,7 @@ int pb_init(void)
}
}

Size=Pitch*VSize;
Size=Pitch*VSize*pb_FBSizeMultiplier;

//verify 64 bytes alignment for size of a frame buffer
if (Size&(64-1)) debugPrint("pb_init: FBSize is not well aligned.\n");
Expand Down Expand Up @@ -3466,7 +3472,7 @@ int pb_init(void)
}
}

Size=Pitch*VSize;
Size=Pitch*VSize*pb_FBSizeMultiplier;

//verify 64 bytes alignment for size of a frame buffer
if (Size&(64-1)) debugPrint("pb_init: DSSize is not well aligned.\n");
Expand Down Expand Up @@ -3520,7 +3526,7 @@ int pb_init(void)
}
}

Size=Pitch*VSize;
Size=Pitch*VSize*pb_FBSizeMultiplier;

//verify 64 bytes alignment for size of a frame buffer
if (Size&(64-1)) debugPrint("pb_init: EXSize is not well aligned.\n");
Expand Down Expand Up @@ -3728,3 +3734,18 @@ static NTAPI VOID pb_shutdown_notification_routine (PHAL_SHUTDOWN_REGISTRATION S
{
pb_kill();
}

uint8_t* pb_depth_stencil_buffer()
{
return (uint8_t*)pb_DepthStencilAddr;
}

DWORD pb_depth_stencil_pitch()
{
return pb_DepthStencilPitch;
}

DWORD pb_depth_stencil_size()
{
return pb_DSSize;
}
46 changes: 45 additions & 1 deletion lib/pbkit/pbkit.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,26 @@ extern "C"
#define SUBCH_2 2
#define SUBCH_3 3
#define SUBCH_4 4

#define NEXT_SUBCH 5

//DMA and graphics classes
#define DMA_CLASS_2 0x02
#define DMA_CLASS_3 0x03
#define DMA_CLASS_3D 0x3D
#define GR_CLASS_19 0x19
#define GR_CLASS_30 0x30
#define GR_CLASS_39 0x39
#define GR_CLASS_62 0x62
#define GR_CLASS_97 0x97
#define GR_CLASS_9F 0x9F

struct s_CtxDma
{
DWORD ChannelID;
DWORD Inst; //Addr in PRAMIN area, unit=16 bytes blocks, baseaddr=VIDEO_BASE+NV_PRAMIN
DWORD Class;
DWORD isGr;
};

void pb_show_front_screen(void); //shows scene (allows VBL synced screen swapping)
void pb_show_debug_screen(void); //shows debug screen (default openxdk+SDL buffer)
Expand Down Expand Up @@ -113,9 +132,34 @@ void pb_fill(int x,int y,int w,int h, DWORD color); //rectangle fill

void pb_set_viewport(int dwx,int dwy,int width,int height,float zmin,float zmax);

void pb_set_fb_size_multiplier(unsigned int multiplier);

int pb_busy(void);


void pb_create_dma_ctx(DWORD ChannelID,
DWORD Class,
DWORD Base,
DWORD Limit,
struct s_CtxDma *pDmaObject);
void pb_create_gr_ctx(int ChannelID,
int Class,
struct s_CtxDma *pGrObject);
void pb_bind_channel(struct s_CtxDma *pCtxDmaObject);

uint8_t *pb_depth_stencil_buffer();
DWORD pb_depth_stencil_pitch();
DWORD pb_depth_stencil_size();
Copy link
Member

@JayFoxRox JayFoxRox Mar 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I always thought pbkit had this interface backwards. Currently pbkit is a mix of a low-level and high-level driver.

pbkit can submit pushbuffers, but it also constructs them, some using rather high level operations.
Instead, I believe that pbkit should allow submission of pushbuffers, but it should primarily be the users responsibility to construct them.
pbkit should only control the hardware so it can process commands from the pushbuffers.

More importantly, pbkit also manages buffers internally (sometimes like a singleton), but doesn't allow the user to manually create or control these buffers.
Instead, I believe that pbkit should provide the user with buffer creation / managing functions, but the user should always explicitly create these buffers and keep track of them.

This means I'd prefer to see something like pb_create_depth_stencil_buffer(size, pitch) and then the user should be responsible for creating these buffers.
So, in an ideal world, the application would never even have to query for the buffer address / pitch / size, because it created the buffer itself.

This (explicit buffer creation by application) is also required (or at least helpful) for more advanced applications which want to switch render-targets.


DWORD pb_reserve_instance(DWORD size);
void pb_create_gr_instance(int ChannelID,
int Class,
DWORD instance,
DWORD flags,
DWORD flags3D,
struct s_CtxDma *pGrObject);

void pb_print_char(char c);
#ifdef __cplusplus
}
#endif
Expand Down