diff --git a/buildps3.bat b/buildps3.bat index e2dfe1b..c72bed3 100644 --- a/buildps3.bat +++ b/buildps3.bat @@ -9,6 +9,7 @@ spu-lv2-gcc -O3 -c task_putlluc.c -o task_putlluc.o spu-lv2-gcc -O3 -c task_putllc.c -o task_putllc.o spu-lv2-gcc -O3 -c task_put.c -o task_put.o spu-lv2-gcc -O3 -c task_largeput.c -o task_largeput.o +spu-lv2-gcc -O3 -c task_reacc.c -o task_reacc.o spu-lv2-gcc -O3 -mspurs-task task.o -o task.elf spu-lv2-gcc -O3 -mspurs-task task_pingpong.o -o task_pingpong.elf @@ -20,6 +21,7 @@ spu-lv2-gcc -O3 -mspurs-task task_putlluc.o -ldma -o task_putlluc.elf spu-lv2-gcc -O3 -mspurs-task task_putllc.o -ldma -o task_putllc.elf spu-lv2-gcc -O3 -mspurs-task task_put.o -ldma -o task_put.elf spu-lv2-gcc -O3 -mspurs-task task_largeput.o -ldma -o task_largeput.elf +spu-lv2-gcc -O3 -mspurs-task task_reacc.o -o task_reacc.elf spu_elf-to-ppu_obj task.elf task.ppu.o spu_elf-to-ppu_obj task_pingpong.elf task_pingpong.ppu.o @@ -31,6 +33,7 @@ spu_elf-to-ppu_obj task_putlluc.elf task_putlluc.ppu.o spu_elf-to-ppu_obj task_putllc.elf task_putllc.ppu.o spu_elf-to-ppu_obj task_put.elf task_put.ppu.o spu_elf-to-ppu_obj task_largeput.elf task_largeput.ppu.o +spu_elf-to-ppu_obj task_reacc.elf task_reacc.ppu.o ppu-lv2-gcc -O3 -c spurs_helpers.c -o spurs_helpers.o ppu-lv2-gcc -O3 -c test_avalanche.c -o test_avalanche.o @@ -43,5 +46,5 @@ ppu-lv2-gcc -O3 -c test_largeblock.c -o test_largeblock.o ppu-lv2-gcc -O3 -c main.c -o main.o -ppu-lv2-gcc -O3 spurs_helpers.o test_avalanche.o test_pingpong.o test_mfc64.o test_spu_inst.o test_spinlock.o test_block.o test_largeblock.o main.o task.ppu.o task_pingpong.ppu.o task_mfc64.ppu.o task_spuint.ppu.o task_spufloat.ppu.o task_spuspinlock.ppu.o task_putlluc.ppu.o task_putllc.ppu.o task_put.ppu.o task_largeput.ppu.o -lsysmodule_stub -lspurs_stub -o spurs_test.elf +ppu-lv2-gcc -O3 spurs_helpers.o test_avalanche.o test_pingpong.o test_mfc64.o test_spu_inst.o test_spinlock.o test_block.o test_largeblock.o main.o task.ppu.o task_pingpong.ppu.o task_mfc64.ppu.o task_spuint.ppu.o task_spufloat.ppu.o task_spuspinlock.ppu.o task_putlluc.ppu.o task_putllc.ppu.o task_put.ppu.o task_largeput.ppu.o task_reacc.ppu.o -lsysmodule_stub -lspurs_stub -o spurs_test.elf make_fself spurs_test.elf spurs_test.self diff --git a/main.c b/main.c index f460dd4..7cbfdf4 100644 --- a/main.c +++ b/main.c @@ -28,7 +28,7 @@ printf("%s completed in %llu ms (PS3: %u ms)\n", name, (time2 - time1), reference); \ } -#define NUM_TESTS 11 +#define NUM_TESTS 12 enum tests { @@ -43,6 +43,7 @@ enum tests TEST_SPUPUTLLC, TEST_SPUPUT, TEST_SPULARGEPUT, + TEST_SPUREACC, }; #define AVALANCHE_NAME "SPU Task Avalanche" @@ -56,6 +57,7 @@ enum tests #define SPUPUTLLC_NAME "PUTLLC Perf" #define SPUPUT_NAME "PUT Perf" #define SPULARGEPUT_NAME "Large PUT Perf" +#define SPUREACC_NAME "SPU Reciprocal Accurate" typedef struct { @@ -75,7 +77,8 @@ const arg_test arg_conv[NUM_TESTS] = { {'U', TEST_SPUPUTLLUC, SPUPUTLLUC_NAME}, {'T', TEST_SPUPUTLLC, SPUPUTLLC_NAME}, {'R', TEST_SPUPUT, SPUPUT_NAME}, - {'G', TEST_SPULARGEPUT, SPULARGEPUT_NAME}}; + {'G', TEST_SPULARGEPUT, SPULARGEPUT_NAME}, + {'E', TEST_SPUREACC, SPUREACC_NAME}}; extern const CellSpursTaskBinInfo _binary_task_task_spuint_elf_taskbininfo; extern const CellSpursTaskBinInfo _binary_task_task_spufloat_elf_taskbininfo; @@ -85,6 +88,8 @@ extern const CellSpursTaskBinInfo _binary_task_task_putllc_elf_taskbininfo; extern const CellSpursTaskBinInfo _binary_task_task_put_elf_taskbininfo; extern const CellSpursTaskBinInfo _binary_task_task_largeput_elf_taskbininfo; +extern const CellSpursTaskBinInfo _binary_task_task_reacc_elf_taskbininfo; + bool verbose = false; uint64_t get_time() @@ -97,7 +102,7 @@ uint64_t get_time() int main(int argc, char *argv[]) { - printf("SPU Test v1.1.2 by GalCiv\n"); + printf("SPU Test v1.2.1 by GalCiv\n"); unsigned int seed = 12345678; unsigned int repeat = 1; @@ -221,9 +226,11 @@ int main(int argc, char *argv[]) if (tests_to_run[TEST_SPUPUTLLC]) DO_A_TEST(SPUPUTLLC_NAME, test_block(spurs2, &_binary_task_task_putllc_elf_taskbininfo), 3364); if (tests_to_run[TEST_SPUPUT]) - DO_A_TEST(SPUPUT_NAME, test_block(spurs2, &_binary_task_task_put_elf_taskbininfo), 3984); + DO_A_TEST(SPUPUT_NAME, test_block(spurs2, &_binary_task_task_put_elf_taskbininfo), 4567); if (tests_to_run[TEST_SPULARGEPUT]) - DO_A_TEST(SPULARGEPUT_NAME, test_largeblock(spurs2, &_binary_task_task_largeput_elf_taskbininfo), 4454); + DO_A_TEST(SPULARGEPUT_NAME, test_largeblock(spurs2, &_binary_task_task_largeput_elf_taskbininfo), 4423); + if (tests_to_run[TEST_SPUREACC]) + DO_A_TEST(SPUREACC_NAME, test_spu_inst(spurs2, &_binary_task_task_reacc_elf_taskbininfo), 6125); } timeend = get_time(); diff --git a/task_reacc.c b/task_reacc.c new file mode 100644 index 0000000..7ef5ca4 --- /dev/null +++ b/task_reacc.c @@ -0,0 +1,27 @@ +#include +#include +#include +#include + +CELL_SPU_LS_PARAM(16 * 1024, 16 * 1024); + +#define NUM_REACC 800000000ull + +int cellSpursTaskMain(qword argTask, uint64_t argTaskset) +{ + (void)argTaskset; + + vec_float4 values = (vec_float4)argTask; + vec_float4 the_ones = {1.0f, 1.0f, 1.0f, 1.0f}; + + for (uint64_t counter = 0; counter < NUM_REACC; counter++) + { + vec_float4 frest_result = __builtin_spu_frest((vec_float4)values); + qword fi_result = __builtin_si_fi((qword)values, (qword)frest_result); + qword fnms_result = __builtin_si_fnms((qword)values, (qword)fi_result, (qword)the_ones); + qword fma_result = __builtin_si_fma((qword)fnms_result, (qword)fi_result, (qword)fi_result); + values = (vec_float4)fma_result; + } + + return si_to_int((qword)values); +}