From 7e082f0c4f6b34820b4dbf6285d05adf423c091b Mon Sep 17 00:00:00 2001 From: Tharo <17233964+Thar0@users.noreply.github.com> Date: Sat, 28 Dec 2024 20:18:45 +0000 Subject: [PATCH] Use IDO for assembling handwritten asm files in src (#2390) * as0 * Fix ASOPTFLAGS for src/libc, remove unnecessary noreorder region in kanread * Suggested changes * Use %half to load the boot bss size for matching Co-authored-by: cadmic * Wrap all of __osProbeTLB in noreorder --------- Co-authored-by: cadmic --- Makefile | 89 +- include/ultra64/R4300.h | 458 ++++----- include/ultra64/asm.h | 91 +- include/ultra64/exception.h | 8 +- include/ultra64/message.h | 2 +- include/ultra64/rcp.h | 402 ++++---- include/ultra64/regdef.h | 164 ++++ include/ultra64/thread.h | 2 +- src/boot/mio0.s | 92 +- src/code/kanread.s | 125 ++- src/code/z_vimode.c | 2 +- src/libc/absf.s | 14 +- src/libc/sqrt.s | 14 +- src/libc64/fp.s | 163 ++-- src/libleo/api/getaadr.s | 59 +- src/libleo/api/getaadr2.s | 48 +- src/libleo/api/getkadr.s | 135 ++- src/libultra/gu/libm_vals.s | 4 +- src/libultra/io/vimodefpallan1.c | 18 +- src/libultra/io/vimodempallan1.c | 18 +- src/libultra/io/vimodentsclan1.c | 18 +- src/libultra/io/vimodepallan1.c | 18 +- src/libultra/libc/bcmp.s | 148 ++- src/libultra/libc/bcopy.s | 366 ++++---- src/libultra/libc/bzero.s | 100 +- src/libultra/mgu/mtxf2l.s | 58 +- src/libultra/mgu/mtxident.s | 46 +- src/libultra/mgu/mtxidentf.s | 44 +- src/libultra/mgu/mtxl2f.s | 66 +- src/libultra/mgu/normalize.s | 48 +- src/libultra/mgu/scale.s | 98 +- src/libultra/mgu/translate.s | 119 +-- src/libultra/os/exceptasm.s | 1292 ++++++++++++-------------- src/libultra/os/getcause.s | 12 +- src/libultra/os/getcount.s | 12 +- src/libultra/os/getfpccsr.s | 12 +- src/libultra/os/getintmask.s | 69 +- src/libultra/os/getsr.s | 12 +- src/libultra/os/interrupt.s | 76 +- src/libultra/os/invaldcache.s | 110 +-- src/libultra/os/invalicache.s | 66 +- src/libultra/os/maptlbrdb.s | 53 +- src/libultra/os/parameters.s | 41 +- src/libultra/os/probetlb.s | 99 +- src/libultra/os/setcompare.s | 12 +- src/libultra/os/setfpccsr.s | 14 +- src/libultra/os/setintmask.s | 106 +-- src/libultra/os/setsr.s | 14 +- src/libultra/os/setwatchlo.s | 14 +- src/libultra/os/unmaptlball.s | 40 +- src/libultra/os/writebackdcache.s | 84 +- src/libultra/os/writebackdcacheall.s | 28 +- src/makerom/entry.s | 58 +- src/makerom/ipl3.s | 4 - 54 files changed, 2612 insertions(+), 2653 deletions(-) create mode 100644 include/ultra64/regdef.h delete mode 100644 src/makerom/ipl3.s diff --git a/Makefile b/Makefile index b233f84882..f805b08be1 100644 --- a/Makefile +++ b/Makefile @@ -41,6 +41,8 @@ VERSION ?= gc-eu-mq-dbg N_THREADS ?= $(shell nproc) # Check code syntax with host compiler. RUN_CC_CHECK ?= 1 +# If DEBUG_OBJECTS is 1, produce additional debugging files such as objdump output or raw binaries for assets +DEBUG_OBJECTS ?= 0 # Set prefix to mips binutils binaries (mips-linux-gnu-ld => 'mips-linux-gnu-') - Change at your own risk! # In nearly all cases, not having 'mips-linux-gnu-*' binaries on the PATH indicates missing dependencies. MIPS_BINUTILS_PREFIX ?= mips-linux-gnu- @@ -56,6 +58,7 @@ N64_EMULATOR ?= # DEBUG_FEATURES ?= 1 CFLAGS ?= +CCASFLAGS ?= CPPFLAGS ?= CPP_DEFINES ?= @@ -239,6 +242,7 @@ else CPP_DEFINES += -DDEBUG_FEATURES=0 -DNDEBUG OPTFLAGS := -O2 -g3 endif +ASOPTFLAGS := -O1 ifeq ($(OS),Windows_NT) DETECTED_OS=windows @@ -261,9 +265,11 @@ endif # Detect compiler and set variables appropriately. ifeq ($(COMPILER),gcc) CC := $(MIPS_BINUTILS_PREFIX)gcc + CCAS := $(CC) -x assembler-with-cpp else ifeq ($(COMPILER),ido) CC := tools/ido_recomp/$(DETECTED_OS)/7.1/cc CC_OLD := tools/ido_recomp/$(DETECTED_OS)/5.3/cc + CCAS := $(CC_OLD) else $(error Unsupported compiler. Please use either ido or gcc as the COMPILER variable.) endif @@ -276,8 +282,9 @@ ifeq ($(ORIG_COMPILER),1) $(error Please install qemu-irix package or set QEMU_IRIX env var to the full qemu-irix binary path) endif endif - CC = $(QEMU_IRIX) -L tools/ido7.1_compiler tools/ido7.1_compiler/usr/bin/cc - CC_OLD = $(QEMU_IRIX) -L tools/ido5.3_compiler tools/ido5.3_compiler/usr/bin/cc + CC := $(QEMU_IRIX) -L tools/ido7.1_compiler tools/ido7.1_compiler/usr/bin/cc + CC_OLD := $(QEMU_IRIX) -L tools/ido5.3_compiler tools/ido5.3_compiler/usr/bin/cc + CCAS := $(CC) endif AS := $(MIPS_BINUTILS_PREFIX)as @@ -285,6 +292,7 @@ LD := $(MIPS_BINUTILS_PREFIX)ld OBJCOPY := $(MIPS_BINUTILS_PREFIX)objcopy OBJDUMP := $(MIPS_BINUTILS_PREFIX)objdump NM := $(MIPS_BINUTILS_PREFIX)nm +STRIP := $(MIPS_BINUTILS_PREFIX)strip # The default iconv on macOS has some differences from GNU iconv, so we use the Homebrew version instead ifeq ($(UNAME_S),Darwin) @@ -328,8 +336,18 @@ SBCFLAGS := --matching SFCFLAGS := --matching CFLAGS += $(CPP_DEFINES) +CCASFLAGS := $(CPP_DEFINES) CPPFLAGS += $(CPP_DEFINES) +# Extra debugging steps +ifeq ($(DEBUG_OBJECTS),1) + OBJDUMP_CMD = @$(OBJDUMP) $(OBJDUMP_FLAGS) $@ > $(@:.o=.s) + OBJCOPY_CMD = @$(OBJCOPY) -O binary $@ $(@:.o=.bin) +else + OBJDUMP_CMD = @: + OBJCOPY_CMD = @: +endif + ifeq ($(COMPILER),gcc) OPTFLAGS := -Os -ffast-math -fno-unsafe-math-optimizations endif @@ -348,12 +366,14 @@ ASFLAGS := -march=vr4300 -32 -no-pad-sections -Iinclude -I$(EXTRACTED_DIR) ifeq ($(COMPILER),gcc) CFLAGS += -G 0 -nostdinc $(INC) -march=vr4300 -mfix4300 -mabi=32 -mno-abicalls -mdivide-breaks -fno-PIC -fno-common -ffreestanding -fbuiltin -fno-builtin-sinf -fno-builtin-cosf $(CHECK_WARNINGS) -funsigned-char + CCASFLAGS += -G 0 -nostdinc $(INC) -march=vr4300 -mfix4300 -mabi=32 -mno-abicalls -fno-PIC -fno-common -Wa,-no-pad-sections MIPS_VERSION := -mips3 else # Suppress warnings for wrong number of macro arguments (to fake variadic # macros) and Microsoft extensions such as anonymous structs (which the # compiler does support but warns for their usage). CFLAGS += -G 0 -non_shared -fullwarn -verbose -Xcpluscomm $(INC) -Wab,-r4300_mul -woff 516,609,649,838,712,807 + CCASFLAGS += -G 0 -non_shared -fullwarn -verbose -Xcpluscomm $(INC) -Wab,-r4300_mul -woff 516,609,649,838,712,807 -o32 MIPS_VERSION := -mips2 endif @@ -482,7 +502,8 @@ O_FILES := $(foreach f,$(S_FILES:.s=.o),$(BUILD_DIR)/$f) \ $(foreach f,$(ASSET_C_FILES_EXTRACTED:.c=.o),$(f:$(EXTRACTED_DIR)/%=$(BUILD_DIR)/%)) \ $(foreach f,$(ASSET_C_FILES_COMMITTED:.c=.o),$(BUILD_DIR)/$f) \ $(foreach f,$(BASEROM_BIN_FILES),$(BUILD_DIR)/baserom/$(notdir $f).o) \ - $(BUILD_DIR)/src/code/z_message_z_game_over.o + $(BUILD_DIR)/src/code/z_message_z_game_over.o \ + $(BUILD_DIR)/src/makerom/ipl3.o OVL_RELOC_FILES := $(shell $(CPP) $(CPPFLAGS) $(SPEC) | $(BUILD_DIR_REPLACE) | grep -o '[^"]*_reloc.o' ) @@ -545,8 +566,10 @@ $(BUILD_DIR)/src/code/jpegdecoder.o: CC := $(CC_OLD) ifeq ($(DEBUG_FEATURES),1) $(BUILD_DIR)/src/libc/%.o: OPTFLAGS := -g +$(BUILD_DIR)/src/libc/%.o: ASOPTFLAGS := -g else $(BUILD_DIR)/src/libc/%.o: OPTFLAGS := -O2 +$(BUILD_DIR)/src/libc/%.o: ASOPTFLAGS := -O2 endif $(BUILD_DIR)/src/libc64/%.o: OPTFLAGS := -O2 @@ -573,6 +596,13 @@ $(BUILD_DIR)/src/libultra/libc/ll.o: MIPS_VERSION := -mips3 -32 $(BUILD_DIR)/src/libultra/libc/llcvt.o: OPTFLAGS := -O1 $(BUILD_DIR)/src/libultra/libc/llcvt.o: MIPS_VERSION := -mips3 -32 +$(BUILD_DIR)/src/libultra/os/exceptasm.o: MIPS_VERSION := -mips3 -32 + +$(BUILD_DIR)/src/code/%.o: ASOPTFLAGS := -O2 +$(BUILD_DIR)/src/libleo/%.o: ASOPTFLAGS := -O2 +$(BUILD_DIR)/src/libultra/libc/%.o: ASOPTFLAGS := -O2 +$(BUILD_DIR)/src/libultra/mgu/%.o: ASOPTFLAGS := -O2 + ifeq ($(LIBULTRA_VERSION),I) $(BUILD_DIR)/src/libultra/gu/%.o: OPTFLAGS := -O3 $(BUILD_DIR)/src/libultra/io/%.o: OPTFLAGS := -O1 @@ -636,6 +666,11 @@ $(BUILD_DIR)/src/libultra/libc/ll.o: OPTFLAGS := -Ofast $(BUILD_DIR)/src/overlays/%.o: CFLAGS += -fno-merge-constants -mno-explicit-relocs -mno-split-addresses endif +SET_ABI_BIT = @: +$(BUILD_DIR)/src/libultra/os/exceptasm.o: SET_ABI_BIT = $(PYTHON) tools/set_o32abi_bit.py $@ +$(BUILD_DIR)/src/libultra/libc/ll.o: SET_ABI_BIT = $(PYTHON) tools/set_o32abi_bit.py $@ +$(BUILD_DIR)/src/libultra/libc/llcvt.o: SET_ABI_BIT = $(PYTHON) tools/set_o32abi_bit.py $@ + #### Main Targets ### all: rom compress @@ -783,14 +818,37 @@ endif $(BUILD_DIR)/assets/%.o: assets/%.c $(CC) -c $(CFLAGS) $(MIPS_VERSION) $(OPTFLAGS) -o $@ $< - $(OBJCOPY) -O binary $@ $@.bin + $(OBJCOPY_CMD) $(BUILD_DIR)/assets/%.o: $(EXTRACTED_DIR)/assets/%.c $(CC) -c $(CFLAGS) $(MIPS_VERSION) $(OPTFLAGS) -o $@ $< - $(OBJCOPY) -O binary $@ $@.bin + $(OBJCOPY_CMD) + +# Assemble the ROM header with GNU AS always +$(BUILD_DIR)/src/makerom/rom_header.o: src/makerom/rom_header.s +ifeq ($(COMPILER),ido) + $(CPP) $(CPPFLAGS) $(MIPS_BUILTIN_DEFS) $(INC) $< | $(AS) $(ASFLAGS) -o $@ +else + $(CCAS) -c $(CCASFLAGS) $(MIPS_VERSION) $(ASOPTFLAGS) -o $@ $< +endif + $(OBJDUMP_CMD) + +$(BUILD_DIR)/src/makerom/ipl3.o: $(EXTRACTED_DIR)/incbin/ipl3 + $(OBJCOPY) -I binary -O elf32-big --rename-section .data=.text $< $@ $(BUILD_DIR)/src/%.o: src/%.s - $(CPP) $(CPPFLAGS) -Iinclude $< | $(AS) $(ASFLAGS) -o $@ +ifeq ($(COMPILER),ido) + $(CCAS) -c $(CCASFLAGS) $(MIPS_VERSION) $(ASOPTFLAGS) -o $(@:.o=.tmp.o) $< +# IDO generates bad symbol tables, fix the symbol table with strip.. + $(STRIP) $(@:.o=.tmp.o) -N dummy-symbol-name +# but strip doesn't know about file-relative offsets in .mdebug and doesn't relocate them, ld will +# segfault unless .mdebug is removed + $(OBJCOPY) --remove-section .mdebug $(@:.o=.tmp.o) $@ + $(SET_ABI_BIT) +else + $(CCAS) -c $(CCASFLAGS) $(MIPS_VERSION) $(ASOPTFLAGS) -o $@ $< +endif + $(OBJDUMP_CMD) # Incremental link to move z_message and z_game_over data into rodata $(BUILD_DIR)/src/code/z_message_z_game_over.o: $(BUILD_DIR)/src/code/z_message.o $(BUILD_DIR)/src/code/z_game_over.o @@ -820,7 +878,8 @@ ifneq ($(RUN_CC_CHECK),0) $(CC_CHECK) $< endif $(CC) -c $(CFLAGS) $(MIPS_VERSION) $(OPTFLAGS) -o $@ $< - @$(OBJDUMP) $(OBJDUMP_FLAGS) $@ > $(@:.o=.s) + $(SET_ABI_BIT) + $(OBJDUMP_CMD) $(BUILD_DIR)/src/audio/session_init.o: src/audio/session_init.c $(BUILD_DIR)/assets/audio/soundfont_sizes.h $(BUILD_DIR)/assets/audio/sequence_sizes.h ifneq ($(RUN_CC_CHECK),0) @@ -830,22 +889,6 @@ endif $(LD) -r -T linker_scripts/data_with_rodata.ld -o $@ $(@:.o=.tmp) @$(OBJDUMP) $(OBJDUMP_FLAGS) $@ > $(@:.o=.s) -$(BUILD_DIR)/src/libultra/libc/ll.o: src/libultra/libc/ll.c -ifneq ($(RUN_CC_CHECK),0) - $(CC_CHECK) $< -endif - $(CC) -c $(CFLAGS) $(MIPS_VERSION) $(OPTFLAGS) -o $@ $< - $(PYTHON) tools/set_o32abi_bit.py $@ - @$(OBJDUMP) $(OBJDUMP_FLAGS) $@ > $(@:.o=.s) - -$(BUILD_DIR)/src/libultra/libc/llcvt.o: src/libultra/libc/llcvt.c -ifneq ($(RUN_CC_CHECK),0) - $(CC_CHECK) $< -endif - $(CC) -c $(CFLAGS) $(MIPS_VERSION) $(OPTFLAGS) -o $@ $< - $(PYTHON) tools/set_o32abi_bit.py $@ - @$(OBJDUMP) $(OBJDUMP_FLAGS) $@ > $(@:.o=.s) - $(BUILD_DIR)/src/overlays/%_reloc.o: $(BUILD_DIR)/$(SPEC) $(FADO) $$(tools/reloc_prereq $< $(notdir $*)) -n $(notdir $*) -o $(@:.o=.s) -M $(@:.o=.d) $(AS) $(ASFLAGS) $(@:.o=.s) -o $@ diff --git a/include/ultra64/R4300.h b/include/ultra64/R4300.h index 6398ea17c4..be0dfab570 100644 --- a/include/ultra64/R4300.h +++ b/include/ultra64/R4300.h @@ -10,7 +10,7 @@ #define C_REG(x) $x #endif -// Segment base addresses and sizes +/* Segment base addresses and sizes */ #define KUBASE 0 #define KUSIZE 0x80000000 #define K0BASE 0x80000000 @@ -20,24 +20,24 @@ #define K2BASE 0xC0000000 #define K2SIZE 0x20000000 -// Exception vectors -#define SIZE_EXCVEC 0x80 // Size of an exc. vec -#define UT_VEC K0BASE // utlbmiss vector -#define R_VEC (K1BASE + 0x1FC00000) // reset vector -#define XUT_VEC (K0BASE + 0x80) // extended address tlbmiss -#define ECC_VEC (K0BASE + 0x100) // Ecc exception vector -#define E_VEC (K0BASE + 0x180) // Gen. exception vector +/* Exception vectors */ +#define SIZE_EXCVEC 0x80 /* Size of an exc. vec */ +#define UT_VEC K0BASE /* utlbmiss vector */ +#define R_VEC (K1BASE + 0x1FC00000) /* reset vector */ +#define XUT_VEC (K0BASE + 0x80) /* extended address tlbmiss */ +#define ECC_VEC (K0BASE + 0x100) /* Ecc exception vector */ +#define E_VEC (K0BASE + 0x180) /* Gen. exception vector */ -// Address conversion macros -#define K0_TO_K1(x) (U32(x) | 0xA0000000) // kseg0 to kseg1 -#define K1_TO_K0(x) (U32(x) & 0x9FFFFFFF) // kseg1 to kseg0 -#define K0_TO_PHYS(x) (U32(x) & 0x1FFFFFFF) // kseg0 to physical -#define K1_TO_PHYS(x) (U32(x) & 0x1FFFFFFF) // kseg1 to physical -#define KDM_TO_PHYS(x) (U32(x) & 0x1FFFFFFF) // direct mapped to physical -#define PHYS_TO_K0(x) (U32(x) | 0x80000000) // physical to kseg0 -#define PHYS_TO_K1(x) (U32(x) | 0xA0000000) // physical to kseg1 +/* Address conversion macros */ +#define K0_TO_K1(x) (U32(x) | 0xA0000000) /* kseg0 to kseg1 */ +#define K1_TO_K0(x) (U32(x) & 0x9FFFFFFF) /* kseg1 to kseg0 */ +#define K0_TO_PHYS(x) (U32(x) & 0x1FFFFFFF) /* kseg0 to physical */ +#define K1_TO_PHYS(x) (U32(x) & 0x1FFFFFFF) /* kseg1 to physical */ +#define KDM_TO_PHYS(x) (U32(x) & 0x1FFFFFFF) /* direct mapped to physical */ +#define PHYS_TO_K0(x) (U32(x) | 0x80000000) /* physical to kseg0 */ +#define PHYS_TO_K1(x) (U32(x) | 0xA0000000) /* physical to kseg1 */ -// Address predicates +/* Address predicates */ #define IS_KSEG0(x) (U32(x) >= K0BASE && U32(x) < K1BASE) #define IS_KSEG1(x) (U32(x) >= K1BASE && U32(x) < K2BASE) #define IS_KSEGDM(x) (U32(x) >= K0BASE && U32(x) < K2BASE) @@ -45,25 +45,25 @@ #define IS_KPTESEG(x) (U32(x) >= KPTE_SHDUBASE) #define IS_KUSEG(x) (U32(x) < K0BASE) -// TLB size constants +/* TLB size constants */ #define NTLBENTRIES 31 /* entry 31 is reserved by rdb */ #define TLBHI_VPN2MASK 0xFFFFE000 #define TLBHI_VPN2SHIFT 13 #define TLBHI_PIDMASK 0xFF #define TLBHI_PIDSHIFT 0 -#define TLBHI_NPID 255 // 255 to fit in 8 bits +#define TLBHI_NPID 255 /* 255 to fit in 8 bits */ #define TLBLO_PFNMASK 0x3FFFFFC0 #define TLBLO_PFNSHIFT 6 -#define TLBLO_CACHMASK 0x38 // cache coherency algorithm +#define TLBLO_CACHMASK 0x38 /* cache coherency algorithm */ #define TLBLO_CACHSHIFT 3 -#define TLBLO_UNCACHED 0x10 // not cached -#define TLBLO_NONCOHRNT 0x18 // Cacheable non-coherent -#define TLBLO_EXLWR 0x28 // Exclusive write -#define TLBLO_D 0x4 // writeable -#define TLBLO_V 0x2 // valid bit -#define TLBLO_G 0x1 // global access bit +#define TLBLO_UNCACHED 0x10 /* not cached */ +#define TLBLO_NONCOHRNT 0x18 /* Cacheable non-coherent */ +#define TLBLO_EXLWR 0x28 /* Exclusive write */ +#define TLBLO_D 0x4 /* writeable */ +#define TLBLO_V 0x2 /* valid bit */ +#define TLBLO_G 0x1 /* global access bit */ #define TLBINX_PROBE 0x80000000 #define TLBINX_INXMASK 0x3F @@ -88,104 +88,104 @@ /* * Status register */ -#define SR_CUMASK 0xF0000000 // coproc usable bits +#define SR_CUMASK 0xF0000000 /* coproc usable bits */ -#define SR_CU3 0x80000000 // Coprocessor 3 usable -#define SR_CU2 0x40000000 // Coprocessor 2 usable -#define SR_CU1 0x20000000 // Coprocessor 1 usable -#define SR_CU0 0x10000000 // Coprocessor 0 usable -#define SR_RP 0x08000000 // Reduced power (quarter speed) -#define SR_FR 0x04000000 // MIPS III FP register mode -#define SR_RE 0x02000000 // Reverse endian -#define SR_ITS 0x01000000 // Instruction trace support -#define SR_BEV 0x00400000 // Use boot exception vectors -#define SR_TS 0x00200000 // TLB shutdown -#define SR_SR 0x00100000 // Soft reset occured -#define SR_CH 0x00040000 // Cache hit for last 'cache' op -#define SR_CE 0x00020000 // Create ECC -#define SR_DE 0x00010000 // ECC of parity does not cause error +#define SR_CU3 0x80000000 /* Coprocessor 3 usable */ +#define SR_CU2 0x40000000 /* Coprocessor 2 usable */ +#define SR_CU1 0x20000000 /* Coprocessor 1 usable */ +#define SR_CU0 0x10000000 /* Coprocessor 0 usable */ +#define SR_RP 0x08000000 /* Reduced power (quarter speed) */ +#define SR_FR 0x04000000 /* MIPS III FP register mode */ +#define SR_RE 0x02000000 /* Reverse endian */ +#define SR_ITS 0x01000000 /* Instruction trace support */ +#define SR_BEV 0x00400000 /* Use boot exception vectors */ +#define SR_TS 0x00200000 /* TLB shutdown */ +#define SR_SR 0x00100000 /* Soft reset occured */ +#define SR_CH 0x00040000 /* Cache hit for last 'cache' op */ +#define SR_CE 0x00020000 /* Create ECC */ +#define SR_DE 0x00010000 /* ECC of parity does not cause error */ -// Interrupt enable bits -// (NOTE: bits set to 1 enable the corresponding level interrupt) -#define SR_IMASK 0x0000FF00 // Interrupt mask -#define SR_IMASK8 0x00000000 // mask level 8 -#define SR_IMASK7 0x00008000 // mask level 7 -#define SR_IMASK6 0x0000C000 // mask level 6 -#define SR_IMASK5 0x0000E000 // mask level 5 -#define SR_IMASK4 0x0000F000 // mask level 4 -#define SR_IMASK3 0x0000F800 // mask level 3 -#define SR_IMASK2 0x0000FC00 // mask level 2 -#define SR_IMASK1 0x0000FE00 // mask level 1 -#define SR_IMASK0 0x0000FF00 // mask level 0 +/* Interrupt enable bits */ +/* (NOTE: bits set to 1 enable the corresponding level interrupt) */ +#define SR_IMASK 0x0000FF00 /* Interrupt mask */ +#define SR_IMASK8 0x00000000 /* mask level 8 */ +#define SR_IMASK7 0x00008000 /* mask level 7 */ +#define SR_IMASK6 0x0000C000 /* mask level 6 */ +#define SR_IMASK5 0x0000E000 /* mask level 5 */ +#define SR_IMASK4 0x0000F000 /* mask level 4 */ +#define SR_IMASK3 0x0000F800 /* mask level 3 */ +#define SR_IMASK2 0x0000FC00 /* mask level 2 */ +#define SR_IMASK1 0x0000FE00 /* mask level 1 */ +#define SR_IMASK0 0x0000FF00 /* mask level 0 */ -#define SR_IBIT8 0x00008000 // bit level 8 -#define SR_IBIT7 0x00004000 // bit level 7 -#define SR_IBIT6 0x00002000 // bit level 6 -#define SR_IBIT5 0x00001000 // bit level 5 -#define SR_IBIT4 0x00000800 // bit level 4 -#define SR_IBIT3 0x00000400 // bit level 3 -#define SR_IBIT2 0x00000200 // bit level 2 -#define SR_IBIT1 0x00000100 // bit level 1 +#define SR_IBIT8 0x00008000 /* bit level 8 */ +#define SR_IBIT7 0x00004000 /* bit level 7 */ +#define SR_IBIT6 0x00002000 /* bit level 6 */ +#define SR_IBIT5 0x00001000 /* bit level 5 */ +#define SR_IBIT4 0x00000800 /* bit level 4 */ +#define SR_IBIT3 0x00000400 /* bit level 3 */ +#define SR_IBIT2 0x00000200 /* bit level 2 */ +#define SR_IBIT1 0x00000100 /* bit level 1 */ #define SR_IMASKSHIFT 8 -#define SR_KX 0x00000080 // extended-addr TLB vec in kernel -#define SR_SX 0x00000040 // xtended-addr TLB vec supervisor -#define SR_UX 0x00000020 // xtended-addr TLB vec in user mode -#define SR_KSU_MASK 0x00000018 // mode mask -#define SR_KSU_USR 0x00000010 // user mode -#define SR_KSU_SUP 0x00000008 // supervisor mode -#define SR_KSU_KER 0x00000000 // kernel mode -#define SR_ERL 0x00000004 // Error level, 1=>cache error -#define SR_EXL 0x00000002 // Exception level, 1=>exception -#define SR_IE 0x00000001 // interrupt enable, 1=>enable +#define SR_KX 0x00000080 /* extended-addr TLB vec in kernel */ +#define SR_SX 0x00000040 /* xtended-addr TLB vec supervisor */ +#define SR_UX 0x00000020 /* xtended-addr TLB vec in user mode */ +#define SR_KSU_MASK 0x00000018 /* mode mask */ +#define SR_KSU_USR 0x00000010 /* user mode */ +#define SR_KSU_SUP 0x00000008 /* supervisor mode */ +#define SR_KSU_KER 0x00000000 /* kernel mode */ +#define SR_ERL 0x00000004 /* Error level, 1=>cache error */ +#define SR_EXL 0x00000002 /* Exception level, 1=>exception */ +#define SR_IE 0x00000001 /* interrupt enable, 1=>enable */ -// Cause Register -#define CAUSE_BD 0x80000000 // Branch delay slot -#define CAUSE_CEMASK 0x30000000 // coprocessor error +/* Cause Register */ +#define CAUSE_BD 0x80000000 /* Branch delay slot */ +#define CAUSE_CEMASK 0x30000000 /* coprocessor error */ #define CAUSE_CESHIFT 28 -// Interrupt pending bits -#define CAUSE_IP8 0x00008000 // External level 8 pending - COMPARE -#define CAUSE_IP7 0x00004000 // External level 7 pending - INT4 -#define CAUSE_IP6 0x00002000 // External level 6 pending - INT3 -#define CAUSE_IP5 0x00001000 // External level 5 pending - INT2 -#define CAUSE_IP4 0x00000800 // External level 4 pending - INT1 -#define CAUSE_IP3 0x00000400 // External level 3 pending - INT0 -#define CAUSE_SW2 0x00000200 // Software level 2 pending -#define CAUSE_SW1 0x00000100 // Software level 1 pending +/* Interrupt pending bits */ +#define CAUSE_IP8 0x00008000 /* External level 8 pending - COMPARE */ +#define CAUSE_IP7 0x00004000 /* External level 7 pending - INT4 */ +#define CAUSE_IP6 0x00002000 /* External level 6 pending - INT3 */ +#define CAUSE_IP5 0x00001000 /* External level 5 pending - INT2 */ +#define CAUSE_IP4 0x00000800 /* External level 4 pending - INT1 */ +#define CAUSE_IP3 0x00000400 /* External level 3 pending - INT0 */ +#define CAUSE_SW2 0x00000200 /* Software level 2 pending */ +#define CAUSE_SW1 0x00000100 /* Software level 1 pending */ -#define CAUSE_IPMASK 0x0000FF00 // Pending interrupt mask +#define CAUSE_IPMASK 0x0000FF00 /* Pending interrupt mask */ #define CAUSE_IPSHIFT 8 -#define CAUSE_EXCMASK 0x0000007C // Cause code bits +#define CAUSE_EXCMASK 0x0000007C /* Cause code bits */ #define CAUSE_EXCSHIFT 2 -// Cause register exception codes +/* Cause register exception codes */ #define EXC_CODE(x) ((x) << 2) -// Hardware exception codes -#define EXC_INT EXC_CODE(0) // interrupt -#define EXC_MOD EXC_CODE(1) // TLB mod -#define EXC_RMISS EXC_CODE(2) // Read TLB Miss -#define EXC_WMISS EXC_CODE(3) // Write TLB Miss -#define EXC_RADE EXC_CODE(4) // Read Address Error -#define EXC_WADE EXC_CODE(5) // Write Address Error -#define EXC_IBE EXC_CODE(6) // Instruction Bus Error -#define EXC_DBE EXC_CODE(7) // Data Bus Error -#define EXC_SYSCALL EXC_CODE(8) // SYSCALL -#define EXC_BREAK EXC_CODE(9) // BREAKpoint -#define EXC_II EXC_CODE(10) // Illegal Instruction -#define EXC_CPU EXC_CODE(11) // CoProcessor Unusable -#define EXC_OV EXC_CODE(12) // OVerflow -#define EXC_TRAP EXC_CODE(13) // Trap exception -#define EXC_VCEI EXC_CODE(14) // Virt. Coherency on Inst. fetch -#define EXC_FPE EXC_CODE(15) // Floating Point Exception -#define EXC_WATCH EXC_CODE(23) // Watchpoint reference -#define EXC_VCED EXC_CODE(31) // Virt. Coherency on data read +/* Hardware exception codes */ +#define EXC_INT EXC_CODE(0) /* interrupt */ +#define EXC_MOD EXC_CODE(1) /* TLB mod */ +#define EXC_RMISS EXC_CODE(2) /* Read TLB Miss */ +#define EXC_WMISS EXC_CODE(3) /* Write TLB Miss */ +#define EXC_RADE EXC_CODE(4) /* Read Address Error */ +#define EXC_WADE EXC_CODE(5) /* Write Address Error */ +#define EXC_IBE EXC_CODE(6) /* Instruction Bus Error */ +#define EXC_DBE EXC_CODE(7) /* Data Bus Error */ +#define EXC_SYSCALL EXC_CODE(8) /* SYSCALL */ +#define EXC_BREAK EXC_CODE(9) /* BREAKpoint */ +#define EXC_II EXC_CODE(10) /* Illegal Instruction */ +#define EXC_CPU EXC_CODE(11) /* CoProcessor Unusable */ +#define EXC_OV EXC_CODE(12) /* OVerflow */ +#define EXC_TRAP EXC_CODE(13) /* Trap exception */ +#define EXC_VCEI EXC_CODE(14) /* Virt. Coherency on Inst. fetch */ +#define EXC_FPE EXC_CODE(15) /* Floating Point Exception */ +#define EXC_WATCH EXC_CODE(23) /* Watchpoint reference */ +#define EXC_VCED EXC_CODE(31) /* Virt. Coherency on data read */ -// C0_PRID Defines +/* C0_PRID Defines */ #define C0_IMPMASK 0xFF00 #define C0_IMPSHIFT 8 #define C0_REVMASK 0xFF @@ -193,106 +193,106 @@ #define C0_MAJREVSHIFT 4 #define C0_MINREVMASK 0xF -// Coprocessor 0 operations -#define C0_READI 0x1 // read ITLB entry addressed by C0_INDEX -#define C0_WRITEI 0x2 // write ITLB entry addressed by C0_INDEX -#define C0_WRITER 0x6 // write ITLB entry addressed by C0_RAND -#define C0_PROBE 0x8 // probe for ITLB entry addressed by TLBHI -#define C0_RFE 0x10 // restore for exception +/* Coprocessor 0 operations */ +#define C0_READI 0x1 /* read ITLB entry addressed by C0_INDEX */ +#define C0_WRITEI 0x2 /* write ITLB entry addressed by C0_INDEX */ +#define C0_WRITER 0x6 /* write ITLB entry addressed by C0_RAND */ +#define C0_PROBE 0x8 /* probe for ITLB entry addressed by TLBHI */ +#define C0_RFE 0x10 /* restore for exception */ -// 'cache' instruction definitions +/* 'cache' instruction definitions */ -// Target cache -#define CACH_PI 0x0 // specifies primary inst. cache -#define CACH_PD 0x1 // primary data cache -#define CACH_SI 0x2 // secondary instruction cache -#define CACH_SD 0x3 // secondary data cache +/* Target cache */ +#define CACH_PI 0x0 /* specifies primary inst. cache */ +#define CACH_PD 0x1 /* primary data cache */ +#define CACH_SI 0x2 /* secondary instruction cache */ +#define CACH_SD 0x3 /* secondary data cache */ -// Cache operations -#define C_IINV 0x0 // index invalidate (inst, 2nd inst) -#define C_IWBINV 0x0 // index writeback inval (d, sd) -#define C_ILT 0x4 // index load tag (all) -#define C_IST 0x8 // index store tag (all) -#define C_CDX 0xC // create dirty exclusive (d, sd) -#define C_HINV 0x10 // hit invalidate (all) -#define C_HWBINV 0x14 // hit writeback inv. (d, sd) -#define C_FILL 0x14 // fill (i) -#define C_HWB 0x18 // hit writeback (i, d, sd) -#define C_HSV 0x1C // hit set virt. (si, sd) +/* Cache operations */ +#define C_IINV 0x0 /* index invalidate (inst, 2nd inst) */ +#define C_IWBINV 0x0 /* index writeback inval (d, sd) */ +#define C_ILT 0x4 /* index load tag (all) */ +#define C_IST 0x8 /* index store tag (all) */ +#define C_CDX 0xC /* create dirty exclusive (d, sd) */ +#define C_HINV 0x10 /* hit invalidate (all) */ +#define C_HWBINV 0x14 /* hit writeback inv. (d, sd) */ +#define C_FILL 0x14 /* fill (i) */ +#define C_HWB 0x18 /* hit writeback (i, d, sd) */ +#define C_HSV 0x1C /* hit set virt. (si, sd) */ -// Cache size definitions -#define ICACHE_SIZE 0x4000 // 16K -#define ICACHE_LINESIZE 32 // 8 words +/* Cache size definitions */ +#define ICACHE_SIZE 0x4000 /* 16K */ +#define ICACHE_LINESIZE 32 /* 8 words */ #define ICACHE_LINEMASK (ICACHE_LINESIZE - 1) -#define DCACHE_SIZE 0x2000 // 8K -#define DCACHE_LINESIZE 16 // 4 words +#define DCACHE_SIZE 0x2000 /* 8K */ +#define DCACHE_LINESIZE 16 /* 4 words */ #define DCACHE_LINEMASK (DCACHE_LINESIZE - 1) -// C0_CONFIG register definitions -#define CONFIG_CM 0x80000000 // 1 == Master-Checker enabled -#define CONFIG_EC 0x70000000 // System Clock ratio -#define CONFIG_EC_1_1 0x6 // System Clock ratio 1 :1 -#define CONFIG_EC_3_2 0x7 // System Clock ratio 1.5 :1 -#define CONFIG_EC_2_1 0x0 // System Clock ratio 2 :1 -#define CONFIG_EC_3_1 0x1 // System Clock ratio 3 :1 -#define CONFIG_EP 0x0F000000 // Transmit Data Pattern -#define CONFIG_SB 0x00C00000 // Secondary cache block size +/* C0_CONFIG register definitions */ +#define CONFIG_CM 0x80000000 /* 1 == Master-Checker enabled */ +#define CONFIG_EC 0x70000000 /* System Clock ratio */ +#define CONFIG_EC_1_1 0x6 /* System Clock ratio 1 :1 */ +#define CONFIG_EC_3_2 0x7 /* System Clock ratio 1.5 :1 */ +#define CONFIG_EC_2_1 0x0 /* System Clock ratio 2 :1 */ +#define CONFIG_EC_3_1 0x1 /* System Clock ratio 3 :1 */ +#define CONFIG_EP 0x0F000000 /* Transmit Data Pattern */ +#define CONFIG_SB 0x00C00000 /* Secondary cache block size */ -#define CONFIG_SS 0x00200000 // Split scache: 0 == I&D combined -#define CONFIG_SW 0x00100000 // scache port: 0==128, 1==64 -#define CONFIG_EW 0x000C0000 // System Port width: 0==64, 1==32 -#define CONFIG_SC 0x00020000 // 0 -> 2nd cache present -#define CONFIG_SM 0x00010000 // 0 -> Dirty Shared Coherency enable -#define CONFIG_BE 0x00008000 // Endian-ness: 1 --> BE -#define CONFIG_EM 0x00004000 // 1 -> ECC mode, 0 -> parity -#define CONFIG_EB 0x00002000 // Block order:1->sequent,0->subblock +#define CONFIG_SS 0x00200000 /* Split scache: 0 == I&D combined */ +#define CONFIG_SW 0x00100000 /* scache port: 0==128, 1==64 */ +#define CONFIG_EW 0x000C0000 /* System Port width: 0==64, 1==32 */ +#define CONFIG_SC 0x00020000 /* 0 -> 2nd cache present */ +#define CONFIG_SM 0x00010000 /* 0 -> Dirty Shared Coherency enable */ +#define CONFIG_BE 0x00008000 /* Endian-ness: 1 --> BE */ +#define CONFIG_EM 0x00004000 /* 1 -> ECC mode, 0 -> parity */ +#define CONFIG_EB 0x00002000 /* Block order:1->sequent,0->subblock */ -#define CONFIG_IC 0x00000E00 // Primary Icache size -#define CONFIG_DC 0x000001C0 // Primary Dcache size -#define CONFIG_IB 0x00000020 // Icache block size -#define CONFIG_DB 0x00000010 // Dcache block size -#define CONFIG_CU 0x00000008 // Update on Store-conditional -#define CONFIG_K0 0x00000007 // K0SEG Coherency algorithm +#define CONFIG_IC 0x00000E00 /* Primary Icache size */ +#define CONFIG_DC 0x000001C0 /* Primary Dcache size */ +#define CONFIG_IB 0x00000020 /* Icache block size */ +#define CONFIG_DB 0x00000010 /* Dcache block size */ +#define CONFIG_CU 0x00000008 /* Update on Store-conditional */ +#define CONFIG_K0 0x00000007 /* K0SEG Coherency algorithm */ -#define CONFIG_UNCACHED 0x00000002 // K0 is uncached +#define CONFIG_UNCACHED 0x00000002 /* K0 is uncached */ #define CONFIG_NONCOHRNT 0x00000003 #define CONFIG_COHRNT_EXLWR 0x00000005 -#define CONFIG_SB_SHFT 22 // shift SB to bit position 0 -#define CONFIG_IC_SHFT 9 // shift IC to bit position 0 -#define CONFIG_DC_SHFT 6 // shift DC to bit position 0 -#define CONFIG_BE_SHFT 15 // shift BE to bit position 0 +#define CONFIG_SB_SHFT 22 /* shift SB to bit position 0 */ +#define CONFIG_IC_SHFT 9 /* shift IC to bit position 0 */ +#define CONFIG_DC_SHFT 6 /* shift DC to bit position 0 */ +#define CONFIG_BE_SHFT 15 /* shift BE to bit position 0 */ -// C0_TAGLO definitions for setting/getting cache states and physaddr bits -#define SADDRMASK 0xFFFFE000 // 31..13 -> scache paddr bits 35..17 -#define SVINDEXMASK 0x00000380 // 9..7: prim virt index bits 14..12 -#define SSTATEMASK 0x00001C00 // bits 12..10 hold scache line state -#define SINVALID 0x00000000 // invalid --> 000 == state 0 -#define SCLEANEXCL 0x00001000 // clean exclusive --> 100 == state 4 -#define SDIRTYEXCL 0x00001400 // dirty exclusive --> 101 == state 5 -#define SECC_MASK 0x0000007F // low 7 bits are ecc for the tag -#define SADDR_SHIFT 4 // shift STagLo (31..13) to 35..17 +/* C0_TAGLO definitions for setting/getting cache states and physaddr bits */ +#define SADDRMASK 0xFFFFE000 /* 31..13 -> scache paddr bits 35..17 */ +#define SVINDEXMASK 0x00000380 /* 9..7: prim virt index bits 14..12 */ +#define SSTATEMASK 0x00001C00 /* bits 12..10 hold scache line state */ +#define SINVALID 0x00000000 /* invalid --> 000 == state 0 */ +#define SCLEANEXCL 0x00001000 /* clean exclusive --> 100 == state 4 */ +#define SDIRTYEXCL 0x00001400 /* dirty exclusive --> 101 == state 5 */ +#define SECC_MASK 0x0000007F /* low 7 bits are ecc for the tag */ +#define SADDR_SHIFT 4 /* shift STagLo (31..13) to 35..17 */ -#define PADDRMASK 0xFFFFFF00 // PTagLo31..8->prim paddr bits35..12 -#define PADDR_SHIFT 4 // roll bits 35..12 down to 31..8 -#define PSTATEMASK 0x00C0 // bits 7..6 hold primary line state -#define PINVALID 0x0000 // invalid --> 000 == state 0 -#define PCLEANEXCL 0x0080 // clean exclusive --> 10 == state 2 -#define PDIRTYEXCL 0x00C0 // dirty exclusive --> 11 == state 3 -#define PPARITY_MASK 0x0001 // low bit is parity bit (even). +#define PADDRMASK 0xFFFFFF00 /* PTagLo31..8->prim paddr bits35..12 */ +#define PADDR_SHIFT 4 /* roll bits 35..12 down to 31..8 */ +#define PSTATEMASK 0x00C0 /* bits 7..6 hold primary line state */ +#define PINVALID 0x0000 /* invalid --> 000 == state 0 */ +#define PCLEANEXCL 0x0080 /* clean exclusive --> 10 == state 2 */ +#define PDIRTYEXCL 0x00C0 /* dirty exclusive --> 11 == state 3 */ +#define PPARITY_MASK 0x0001 /* low bit is parity bit (even). */ -// C0_CACHE_ERR definitions. -#define CACHERR_ER 0x80000000 // 0: inst ref, 1: data ref -#define CACHERR_EC 0x40000000 // 0: primary, 1: secondary -#define CACHERR_ED 0x20000000 // 1: data error -#define CACHERR_ET 0x10000000 // 1: tag error -#define CACHERR_ES 0x08000000 // 1: external ref, e.g. snoo -#define CACHERR_EE 0x04000000 // error on SysAD bus -#define CACHERR_EB 0x02000000 // complicated, see spec. -#define CACHERR_EI 0x01000000 // complicated, see spec. -#define CACHERR_SIDX_MASK 0x003FFFF8 // secondary cache index -#define CACHERR_PIDX_MASK 0x00000007 // primary cache index -#define CACHERR_PIDX_SHIFT 12 // bits 2..0 are paddr14..12 +/* C0_CACHE_ERR definitions. */ +#define CACHERR_ER 0x80000000 /* 0: inst ref, 1: data ref */ +#define CACHERR_EC 0x40000000 /* 0: primary, 1: secondary */ +#define CACHERR_ED 0x20000000 /* 1: data error */ +#define CACHERR_ET 0x10000000 /* 1: tag error */ +#define CACHERR_ES 0x08000000 /* 1: external ref, e.g. snoo */ +#define CACHERR_EE 0x04000000 /* error on SysAD bus */ +#define CACHERR_EB 0x02000000 /* complicated, see spec. */ +#define CACHERR_EI 0x01000000 /* complicated, see spec. */ +#define CACHERR_SIDX_MASK 0x003FFFF8 /* secondary cache index */ +#define CACHERR_PIDX_MASK 0x00000007 /* primary cache index */ +#define CACHERR_PIDX_SHIFT 12 /* bits 2..0 are paddr14..12 */ /* * R4000 family supports hardware watchpoints: @@ -311,57 +311,57 @@ #define WATCHLO_VALIDMASK 0xFFFFFFFB #define WATCHHI_VALIDMASK 0x0000000F -// Coprocessor 0 registers +/* Coprocessor 0 registers */ #define C0_INX C_REG(0) #define C0_RAND C_REG(1) #define C0_ENTRYLO0 C_REG(2) #define C0_ENTRYLO1 C_REG(3) #define C0_CONTEXT C_REG(4) -#define C0_PAGEMASK C_REG(5) // page mask -#define C0_WIRED C_REG(6) // # wired entries in tlb +#define C0_PAGEMASK C_REG(5) /* page mask */ +#define C0_WIRED C_REG(6) /* # wired entries in tlb */ #define C0_BADVADDR C_REG(8) -#define C0_COUNT C_REG(9) // free-running counter +#define C0_COUNT C_REG(9) /* free-running counter */ #define C0_ENTRYHI C_REG(10) -#define C0_COMPARE C_REG(11) // counter comparison reg. +#define C0_COMPARE C_REG(11) /* counter comparison reg. */ #define C0_SR C_REG(12) #define C0_CAUSE C_REG(13) #define C0_EPC C_REG(14) -#define C0_PRID C_REG(15) // revision identifier -#define C0_CONFIG C_REG(16) // hardware configuration -#define C0_LLADDR C_REG(17) // load linked address -#define C0_WATCHLO C_REG(18) // watchpoint -#define C0_WATCHHI C_REG(19) // watchpoint -#define C0_ECC C_REG(26) // S-cache ECC and primary parity -#define C0_CACHE_ERR C_REG(27) // cache error status -#define C0_TAGLO C_REG(28) // cache operations -#define C0_TAGHI C_REG(29) // cache operations -#define C0_ERROR_EPC C_REG(30) // ECC error prg. counter +#define C0_PRID C_REG(15) /* revision identifier */ +#define C0_CONFIG C_REG(16) /* hardware configuration */ +#define C0_LLADDR C_REG(17) /* load linked address */ +#define C0_WATCHLO C_REG(18) /* watchpoint */ +#define C0_WATCHHI C_REG(19) /* watchpoint */ +#define C0_ECC C_REG(26) /* S-cache ECC and primary parity */ +#define C0_CACHE_ERR C_REG(27) /* cache error status */ +#define C0_TAGLO C_REG(28) /* cache operations */ +#define C0_TAGHI C_REG(29) /* cache operations */ +#define C0_ERROR_EPC C_REG(30) /* ECC error prg. counter */ -// floating-point status register +/* floating-point status register */ #define C1_FPCSR C_REG(31) -#define FPCSR_FS 0x01000000 // flush denorm to zero -#define FPCSR_C 0x00800000 // condition bit -#define FPCSR_CE 0x00020000 // cause: unimplemented operation -#define FPCSR_CV 0x00010000 // cause: invalid operation -#define FPCSR_CZ 0x00008000 // cause: division by zero -#define FPCSR_CO 0x00004000 // cause: overflow -#define FPCSR_CU 0x00002000 // cause: underflow -#define FPCSR_CI 0x00001000 // cause: inexact operation -#define FPCSR_EV 0x00000800 // enable: invalid operation -#define FPCSR_EZ 0x00000400 // enable: division by zero -#define FPCSR_EO 0x00000200 // enable: overflow -#define FPCSR_EU 0x00000100 // enable: underflow -#define FPCSR_EI 0x00000080 // enable: inexact operation -#define FPCSR_FV 0x00000040 // flag: invalid operation -#define FPCSR_FZ 0x00000020 // flag: division by zero -#define FPCSR_FO 0x00000010 // flag: overflow -#define FPCSR_FU 0x00000008 // flag: underflow -#define FPCSR_FI 0x00000004 // flag: inexact operation -#define FPCSR_RM_MASK 0x00000003 // rounding mode mask -#define FPCSR_RM_RN 0x00000000 // round to nearest -#define FPCSR_RM_RZ 0x00000001 // round to zero -#define FPCSR_RM_RP 0x00000002 // round to positive infinity -#define FPCSR_RM_RM 0x00000003 // round to negative infinity +#define FPCSR_FS 0x01000000 /* flush denorm to zero */ +#define FPCSR_C 0x00800000 /* condition bit */ +#define FPCSR_CE 0x00020000 /* cause: unimplemented operation */ +#define FPCSR_CV 0x00010000 /* cause: invalid operation */ +#define FPCSR_CZ 0x00008000 /* cause: division by zero */ +#define FPCSR_CO 0x00004000 /* cause: overflow */ +#define FPCSR_CU 0x00002000 /* cause: underflow */ +#define FPCSR_CI 0x00001000 /* cause: inexact operation */ +#define FPCSR_EV 0x00000800 /* enable: invalid operation */ +#define FPCSR_EZ 0x00000400 /* enable: division by zero */ +#define FPCSR_EO 0x00000200 /* enable: overflow */ +#define FPCSR_EU 0x00000100 /* enable: underflow */ +#define FPCSR_EI 0x00000080 /* enable: inexact operation */ +#define FPCSR_FV 0x00000040 /* flag: invalid operation */ +#define FPCSR_FZ 0x00000020 /* flag: division by zero */ +#define FPCSR_FO 0x00000010 /* flag: overflow */ +#define FPCSR_FU 0x00000008 /* flag: underflow */ +#define FPCSR_FI 0x00000004 /* flag: inexact operation */ +#define FPCSR_RM_MASK 0x00000003 /* rounding mode mask */ +#define FPCSR_RM_RN 0x00000000 /* round to nearest */ +#define FPCSR_RM_RZ 0x00000001 /* round to zero */ +#define FPCSR_RM_RP 0x00000002 /* round to positive infinity */ +#define FPCSR_RM_RM 0x00000003 /* round to negative infinity */ #endif diff --git a/include/ultra64/asm.h b/include/ultra64/asm.h index 2a2536bdaa..acf6b8f0a7 100644 --- a/include/ultra64/asm.h +++ b/include/ultra64/asm.h @@ -6,55 +6,104 @@ #define _MIPS_ISA_MIPS2 2 #define _MIPS_ISA_MIPS3 3 #define _MIPS_ISA_MIPS4 4 + +#define _MIPS_SIM_ABI32 1 /* MIPS MSIG calling convention */ +#define _MIPS_SIM_NABI32 2 /* MIPS new 32-bit abi */ + /* NABI32 is 64bit calling convention but 32bit type sizes) */ +#define _MIPS_SIM_ABI64 3 /* MIPS 64 calling convention */ #endif #ifndef _LANGUAGE_C +#ifdef __GNUC__ +#define TYPE(x, t) .type x, @t +#define SIZE(x) .size x, . - x +#else +#define TYPE(x, t) +#define SIZE(x) +#endif + #define LEAF(x) \ - .balign 4 ;\ + .align 2 ;\ .globl x ;\ - .type x, @function ;\ + TYPE(x, function) ;\ + .ent x, 0 ;\ x: ;\ - .ent x, 0 ;\ - .frame $sp, 0, $ra + .frame sp, 0, ra #define XLEAF(x) \ - .balign 4 ;\ + .align 2 ;\ .globl x ;\ - .type x, @function ;\ - x: ;\ - .aent x, 0 + TYPE(x, function) ;\ + .aent x, 0 ;\ + x: #define NESTED(x, fsize, ra) \ .globl x ;\ + .ent x, 0 ;\ x: ;\ - .ent x, 0 ;\ - .frame $sp, fsize, ra + .frame sp, fsize, ra #define XNESTED(x) \ .globl x ;\ - x: ;\ - .aent x, 0 + .aent x, 0 ;\ + x: #define END(x) \ - .size x, . - x ;\ + SIZE(x) ;\ .end x -#define IMPORT(x, size) \ - .extern x, size - #define EXPORT(x) \ .globl x ;\ x: +#ifdef __sgi +#define IMPORT(sym, size) \ + .extern sym, size +#else +#define IMPORT(sym, size) +#endif + #define DATA(x) \ - .balign 4 ;\ + .align 2 ;\ .globl x ;\ - .type x, @object ;\ + TYPE(x, object) ;\ x: #define ENDDATA(x) \ - .size x, . - x + SIZE(x) + +#define MFC0(dst, src) \ + .set noreorder; mfc0 dst, src; .set reorder +#define MTC0(dst, src) \ + .set noreorder; mtc0 dst, src; .set reorder + +#define CFC1(dst, src) \ + .set noreorder; cfc1 dst, src; .set reorder +#define CTC1(src, dst) \ + .set noreorder; ctc1 src, dst; .set reorder + +#define NOP \ + .set noreorder; nop; .set reorder + +#define TLBWI \ + .set noreorder; tlbwi; .set reorder + +#define TLBR \ + .set noreorder; tlbr; .set reorder + +#define TLBP \ + .set noreorder; tlbp; .set reorder + +#ifdef __sgi +#define ABS(x, y) \ + .globl x; \ + x = y +#else +#define ABS(x, y) \ + .globl x; \ + .set x, y +#endif #endif @@ -62,11 +111,11 @@ * Stack Alignment */ #if (_MIPS_SIM == _ABIO32) -#define NARGSAVE 4 // space for 4 args must be allocated +#define NARGSAVE 4 /* space for 4 args must be allocated */ #define ALSZ (8-1) #define ALMASK ~(8-1) #elif (_MIPS_SIM == _ABIN32 || _MIPS_SIM == _ABI64) -#define NARGSAVE 0 // no caller responsibilities +#define NARGSAVE 0 /* no caller responsibilities */ #define ALSZ (16-1) #define ALMASK ~(16-1) #endif diff --git a/include/ultra64/exception.h b/include/ultra64/exception.h index 886a3b9096..d4368da166 100644 --- a/include/ultra64/exception.h +++ b/include/ultra64/exception.h @@ -1,7 +1,7 @@ #ifndef ULTRA64_EXCEPTION_H #define ULTRA64_EXCEPTION_H -// Interrupt masks +/* Interrupt masks */ #define OS_IM_NONE 0x00000001 #define OS_IM_RCP 0x00000401 #define OS_IM_SW1 0x00000501 @@ -23,7 +23,7 @@ #define RCP_IMASK 0x003F0000 #define RCP_IMASKSHIFT 16 -// OSHWIntr values +/* OSHWIntr values */ #define OS_INTR_CART 1 #ifdef _LANGUAGE_C @@ -51,11 +51,11 @@ extern __osHwInt __osHwIntTable[]; #else -// __osHwInt struct member offsets +/* __osHwInt struct member offsets */ #define HWINT_CALLBACK 0x00 #define HWINT_SP 0x04 -// __osHwInt struct size +/* __osHwInt struct size */ #define HWINT_SIZE 0x8 #endif diff --git a/include/ultra64/message.h b/include/ultra64/message.h index bf93c524c9..055c2e3249 100644 --- a/include/ultra64/message.h +++ b/include/ultra64/message.h @@ -47,7 +47,7 @@ typedef struct OSMesgQueue { #else -// OSMesgQueue struct member offsets +/* OSMesgQueue struct member offsets */ #define MQ_MTQUEUE 0x00 #define MQ_FULLQUEUE 0x04 diff --git a/include/ultra64/rcp.h b/include/ultra64/rcp.h index 9c97e8bd9f..8fb8a523e9 100644 --- a/include/ultra64/rcp.h +++ b/include/ultra64/rcp.h @@ -114,13 +114,13 @@ /** * External device info */ -#define DEVICE_TYPE_CART 0 // ROM cartridge -#define DEVICE_TYPE_BULK 1 // ROM bulk -#define DEVICE_TYPE_64DD 2 // 64 Disk Drive -#define DEVICE_TYPE_SRAM 3 // SRAM -// 4-6 are reserved -#define DEVICE_TYPE_INIT 7 // initial value -// 8-14 are reserved +#define DEVICE_TYPE_CART 0 /* ROM cartridge */ +#define DEVICE_TYPE_BULK 1 /* ROM bulk */ +#define DEVICE_TYPE_64DD 2 /* 64 Disk Drive */ +#define DEVICE_TYPE_SRAM 3 /* SRAM */ +/* 4-6 are reserved */ +#define DEVICE_TYPE_INIT 7 /* initial value */ +/* 8-14 are reserved */ /** @@ -138,31 +138,31 @@ #define SP_BASE_REG 0x04040000 -// SP memory address (R/W): [12] 0=DMEM,1=IMEM, [11:0] DMEM/IMEM address +/* SP memory address (R/W): [12] 0=DMEM,1=IMEM, [11:0] DMEM/IMEM address */ #define SP_MEM_ADDR_REG (SP_BASE_REG + 0x00) -// SP DRAM DMA address (R/W): [23:0] RDRAM address +/* SP DRAM DMA address (R/W): [23:0] RDRAM address */ #define SP_DRAM_ADDR_REG (SP_BASE_REG + 0x04) -// SP read DMA length (R/W): [31:20] skip, [19:12] count, [11:0] length; RDRAM -> I/DMEM +/* SP read DMA length (R/W): [31:20] skip, [19:12] count, [11:0] length; RDRAM -> I/DMEM */ #define SP_RD_LEN_REG (SP_BASE_REG + 0x08) -// SP write DMA length (R/W): [31:20] skip, [19:12] count, [11:0] length; I/DMEM -> RDRAM +/* SP write DMA length (R/W): [31:20] skip, [19:12] count, [11:0] length; I/DMEM -> RDRAM */ #define SP_WR_LEN_REG (SP_BASE_REG + 0x0C) -// SP status (R/W): [14:0] valid bits; see below for write/read mode +/* SP status (R/W): [14:0] valid bits; see below for write/read mode */ #define SP_STATUS_REG (SP_BASE_REG + 0x10) -// SP DMA full (R): [0] dma full +/* SP DMA full (R): [0] dma full */ #define SP_DMA_FULL_REG (SP_BASE_REG + 0x14) -// SP DMA busy (R): [0] dma busy +/* SP DMA busy (R): [0] dma busy */ #define SP_DMA_BUSY_REG (SP_BASE_REG + 0x18) -// SP semaphore (R/W): Read: [0] acquire semaphore; Write: [] release semaphore +/* SP semaphore (R/W): Read: [0] acquire semaphore; Write: [] release semaphore */ #define SP_SEMAPHORE_REG (SP_BASE_REG + 0x1C) -// SP PC (R/W): [11:0] program counter +/* SP PC (R/W): [11:0] program counter */ #define SP_PC_REG 0x04080000 /* @@ -174,31 +174,31 @@ /* * SP_STATUS_REG: write bits */ -#define SP_CLR_HALT (1 << 0) // clear halt -#define SP_SET_HALT (1 << 1) // set halt -#define SP_CLR_BROKE (1 << 2) // clear broke -#define SP_CLR_INTR (1 << 3) // clear interrupt -#define SP_SET_INTR (1 << 4) // set interrupt -#define SP_CLR_SSTEP (1 << 5) // clear sstep -#define SP_SET_SSTEP (1 << 6) // set sstep -#define SP_CLR_INTR_BREAK (1 << 7) // clear interrupt on break -#define SP_SET_INTR_BREAK (1 << 8) // set interrupt on break -#define SP_CLR_SIG0 (1 << 9) // clear signal 0 -#define SP_SET_SIG0 (1 << 10) // set signal 0 -#define SP_CLR_SIG1 (1 << 11) // clear signal 1 -#define SP_SET_SIG1 (1 << 12) // set signal 1 -#define SP_CLR_SIG2 (1 << 13) // clear signal 2 -#define SP_SET_SIG2 (1 << 14) // set signal 2 -#define SP_CLR_SIG3 (1 << 15) // clear signal 3 -#define SP_SET_SIG3 (1 << 16) // set signal 3 -#define SP_CLR_SIG4 (1 << 17) // clear signal 4 -#define SP_SET_SIG4 (1 << 18) // set signal 4 -#define SP_CLR_SIG5 (1 << 19) // clear signal 5 -#define SP_SET_SIG5 (1 << 20) // set signal 5 -#define SP_CLR_SIG6 (1 << 21) // clear signal 6 -#define SP_SET_SIG6 (1 << 22) // set signal 6 -#define SP_CLR_SIG7 (1 << 23) // clear signal 7 -#define SP_SET_SIG7 (1 << 24) // set signal 7 +#define SP_CLR_HALT (1 << 0) /* clear halt */ +#define SP_SET_HALT (1 << 1) /* set halt */ +#define SP_CLR_BROKE (1 << 2) /* clear broke */ +#define SP_CLR_INTR (1 << 3) /* clear interrupt */ +#define SP_SET_INTR (1 << 4) /* set interrupt */ +#define SP_CLR_SSTEP (1 << 5) /* clear sstep */ +#define SP_SET_SSTEP (1 << 6) /* set sstep */ +#define SP_CLR_INTR_BREAK (1 << 7) /* clear interrupt on break */ +#define SP_SET_INTR_BREAK (1 << 8) /* set interrupt on break */ +#define SP_CLR_SIG0 (1 << 9) /* clear signal 0 */ +#define SP_SET_SIG0 (1 << 10) /* set signal 0 */ +#define SP_CLR_SIG1 (1 << 11) /* clear signal 1 */ +#define SP_SET_SIG1 (1 << 12) /* set signal 1 */ +#define SP_CLR_SIG2 (1 << 13) /* clear signal 2 */ +#define SP_SET_SIG2 (1 << 14) /* set signal 2 */ +#define SP_CLR_SIG3 (1 << 15) /* clear signal 3 */ +#define SP_SET_SIG3 (1 << 16) /* set signal 3 */ +#define SP_CLR_SIG4 (1 << 17) /* clear signal 4 */ +#define SP_SET_SIG4 (1 << 18) /* set signal 4 */ +#define SP_CLR_SIG5 (1 << 19) /* clear signal 5 */ +#define SP_SET_SIG5 (1 << 20) /* set signal 5 */ +#define SP_CLR_SIG6 (1 << 21) /* clear signal 6 */ +#define SP_SET_SIG6 (1 << 22) /* set signal 6 */ +#define SP_CLR_SIG7 (1 << 23) /* clear signal 7 */ +#define SP_SET_SIG7 (1 << 24) /* set signal 7 */ /* * SP_STATUS_REG: read bits @@ -238,22 +238,22 @@ #define SP_SET_CPUSIGNAL SP_SET_SIG4 #define SP_STATUS_CPUSIGNAL SP_STATUS_SIG4 -// SP IMEM BIST REG (R/W): [6:0] BIST status bits; see below for detail +/* SP IMEM BIST REG (R/W): [6:0] BIST status bits; see below for detail */ #define SP_IBIST_REG 0x04080004 /* * SP_IBIST_REG: write bits */ -#define SP_IBIST_CHECK (1 << 0) // BIST check -#define SP_IBIST_GO (1 << 1) // BIST go -#define SP_IBIST_CLEAR (1 << 2) // BIST clear +#define SP_IBIST_CHECK (1 << 0) /* BIST check */ +#define SP_IBIST_GO (1 << 1) /* BIST go */ +#define SP_IBIST_CLEAR (1 << 2) /* BIST clear */ /* * SP_BIST_REG: read bits * First 2 bits are same as in write mode */ #define SP_IBIST_DONE (1 << 2) -#define SP_IBIST_FAILED 0x78 // bits [6:3], BIST fail +#define SP_IBIST_FAILED 0x78 /* bits [6:3], BIST fail */ /** @@ -261,28 +261,28 @@ */ #define DPC_BASE_REG 0x04100000 -// DP CMD DMA start (R/W): [23:0] DMEM/RDRAM start address +/* DP CMD DMA start (R/W): [23:0] DMEM/RDRAM start address */ #define DPC_START_REG (DPC_BASE_REG + 0x00) -// DP CMD DMA end (R/W): [23:0] DMEM/RDRAM end address +/* DP CMD DMA end (R/W): [23:0] DMEM/RDRAM end address */ #define DPC_END_REG (DPC_BASE_REG + 0x04) -// DP CMD DMA end (R): [23:0] DMEM/RDRAM current address +/* DP CMD DMA end (R): [23:0] DMEM/RDRAM current address */ #define DPC_CURRENT_REG (DPC_BASE_REG + 0x08) -// DP CMD status (R/W): [9:0] valid bits - see below for definitions +/* DP CMD status (R/W): [9:0] valid bits - see below for definitions */ #define DPC_STATUS_REG (DPC_BASE_REG + 0x0C) -// DP clock counter (R): [23:0] clock counter +/* DP clock counter (R): [23:0] clock counter */ #define DPC_CLOCK_REG (DPC_BASE_REG + 0x10) -// DP buffer busy counter (R): [23:0] clock counter +/* DP buffer busy counter (R): [23:0] clock counter */ #define DPC_BUFBUSY_REG (DPC_BASE_REG + 0x14) -// DP pipe busy counter (R): [23:0] clock counter +/* DP pipe busy counter (R): [23:0] clock counter */ #define DPC_PIPEBUSY_REG (DPC_BASE_REG + 0x18) -// DP TMEM load counter (R): [23:0] clock counter +/* DP TMEM load counter (R): [23:0] clock counter */ #define DPC_TMEM_REG (DPC_BASE_REG + 0x1C) /* @@ -320,16 +320,16 @@ */ #define DPS_BASE_REG 0x04200000 -// DP tmem built-in self-test (R/W): [10:0] BIST status bits +/* DP tmem built-in self-test (R/W): [10:0] BIST status bits */ #define DPS_TBIST_REG (DPS_BASE_REG + 0x00) -// DP span test mode (R/W): [0] Span buffer test access enable +/* DP span test mode (R/W): [0] Span buffer test access enable */ #define DPS_TEST_MODE_REG (DPS_BASE_REG + 0x04) -// DP span buffer test address (R/W): [6:0] bits +/* DP span buffer test address (R/W): [6:0] bits */ #define DPS_BUFTEST_ADDR_REG (DPS_BASE_REG + 0x08) -// DP span buffer test data (R/W): [31:0] span buffer data +/* DP span buffer test data (R/W): [31:0] span buffer data */ #define DPS_BUFTEST_DATA_REG (DPS_BASE_REG + 0x0C) /* @@ -344,7 +344,7 @@ * First 2 bits are same as in write mode */ #define DPS_TBIST_DONE (1 << 2) -#define DPS_TBIST_FAILED 0x7F8 // bits [10:3], BIST fail +#define DPS_TBIST_FAILED 0x7F8 /* bits [10:3], BIST fail */ /** @@ -352,22 +352,22 @@ */ #define MI_BASE_REG 0x04300000 -// MI init mode (W): [11] clear DP interrupt, [9/10] clear/set ebus test mode -// [8] set init mode, [7] clear init mode, [6:0] init length -// (R): [8] ebus test mode, [7] init mode, [6:0] init length +/* MI init mode (W): [11] clear DP interrupt, [9/10] clear/set ebus test mode */ +/* [8] set init mode, [7] clear init mode, [6:0] init length */ +/* (R): [8] ebus test mode, [7] init mode, [6:0] init length */ #define MI_INIT_MODE_REG (MI_BASE_REG + 0x00) #define MI_MODE_REG MI_INIT_MODE_REG /* * MI_MODE_REG: write bits */ -#define MI_CLR_INIT (1 << 7) // clear init mode -#define MI_SET_INIT (1 << 8) // set init mode -#define MI_CLR_EBUS (1 << 9) // clear ebus test -#define MI_SET_EBUS (1 << 10) // set ebus test mode -#define MI_CLR_DP_INTR (1 << 11) // clear dp interrupt -#define MI_CLR_RDRAM (1 << 12) // clear RDRAM reg -#define MI_SET_RDRAM (1 << 13) // set RDRAM reg mode +#define MI_CLR_INIT (1 << 7) /* clear init mode */ +#define MI_SET_INIT (1 << 8) /* set init mode */ +#define MI_CLR_EBUS (1 << 9) /* clear ebus test */ +#define MI_SET_EBUS (1 << 10) /* set ebus test mode */ +#define MI_CLR_DP_INTR (1 << 11) /* clear dp interrupt */ +#define MI_CLR_RDRAM (1 << 12) /* clear RDRAM reg */ +#define MI_SET_RDRAM (1 << 13) /* set RDRAM reg mode */ /* * MI_MODE_REG: read bits @@ -376,52 +376,52 @@ #define MI_MODE_EBUS (1 << 8) /* ebus test mode */ #define MI_MODE_RDRAM (1 << 9) /* RDRAM reg mode */ -// MI version (R): [31:24] rsp, [23:16] rdp, [15:8] rac, [7:0] io +/* MI version (R): [31:24] rsp, [23:16] rdp, [15:8] rac, [7:0] io */ #define MI_VERSION_REG (MI_BASE_REG + 0x04) #define MI_NOOP_REG MI_VERSION_REG -// MI interrupt (R): [5:0] valid bits - see below for bit patterns +/* MI interrupt (R): [5:0] valid bits - see below for bit patterns */ #define MI_INTR_REG (MI_BASE_REG + 0x08) -// MI interrupt mask (R): [5:0] valid bits - see below for bit patterns -// (W): [11:0] valid bits - see below for bit patterns +/* MI interrupt mask (R): [5:0] valid bits - see below for bit patterns */ +/* (W): [11:0] valid bits - see below for bit patterns */ #define MI_INTR_MASK_REG (MI_BASE_REG + 0x0C) /* * MI_INTR_REG: read bits */ -#define MI_INTR_SP (1 << 0) // SP intr -#define MI_INTR_SI (1 << 1) // SI intr -#define MI_INTR_AI (1 << 2) // AI intr -#define MI_INTR_VI (1 << 3) // VI intr -#define MI_INTR_PI (1 << 4) // PI intr -#define MI_INTR_DP (1 << 5) // DP intr +#define MI_INTR_SP (1 << 0) /* SP intr */ +#define MI_INTR_SI (1 << 1) /* SI intr */ +#define MI_INTR_AI (1 << 2) /* AI intr */ +#define MI_INTR_VI (1 << 3) /* VI intr */ +#define MI_INTR_PI (1 << 4) /* PI intr */ +#define MI_INTR_DP (1 << 5) /* DP intr */ /* * MI_INTR_MASK_REG: write bits */ -#define MI_INTR_MASK_CLR_SP (1 << 0) // clear SP mask -#define MI_INTR_MASK_SET_SP (1 << 1) // set SP mask -#define MI_INTR_MASK_CLR_SI (1 << 2) // clear SI mask -#define MI_INTR_MASK_SET_SI (1 << 3) // set SI mask -#define MI_INTR_MASK_CLR_AI (1 << 4) // clear AI mask -#define MI_INTR_MASK_SET_AI (1 << 5) // set AI mask -#define MI_INTR_MASK_CLR_VI (1 << 6) // clear VI mask -#define MI_INTR_MASK_SET_VI (1 << 7) // set VI mask -#define MI_INTR_MASK_CLR_PI (1 << 8) // clear PI mask -#define MI_INTR_MASK_SET_PI (1 << 9) // set PI mask -#define MI_INTR_MASK_CLR_DP (1 << 10) // clear DP mask -#define MI_INTR_MASK_SET_DP (1 << 11) // set DP mask +#define MI_INTR_MASK_CLR_SP (1 << 0) /* clear SP mask */ +#define MI_INTR_MASK_SET_SP (1 << 1) /* set SP mask */ +#define MI_INTR_MASK_CLR_SI (1 << 2) /* clear SI mask */ +#define MI_INTR_MASK_SET_SI (1 << 3) /* set SI mask */ +#define MI_INTR_MASK_CLR_AI (1 << 4) /* clear AI mask */ +#define MI_INTR_MASK_SET_AI (1 << 5) /* set AI mask */ +#define MI_INTR_MASK_CLR_VI (1 << 6) /* clear VI mask */ +#define MI_INTR_MASK_SET_VI (1 << 7) /* set VI mask */ +#define MI_INTR_MASK_CLR_PI (1 << 8) /* clear PI mask */ +#define MI_INTR_MASK_SET_PI (1 << 9) /* set PI mask */ +#define MI_INTR_MASK_CLR_DP (1 << 10) /* clear DP mask */ +#define MI_INTR_MASK_SET_DP (1 << 11) /* set DP mask */ /* * MI_INTR_MASK_REG: read bits */ -#define MI_INTR_MASK_SP (1 << 0) // SP intr mask -#define MI_INTR_MASK_SI (1 << 1) // SI intr mask -#define MI_INTR_MASK_AI (1 << 2) // AI intr mask -#define MI_INTR_MASK_VI (1 << 3) // VI intr mask -#define MI_INTR_MASK_PI (1 << 4) // PI intr mask -#define MI_INTR_MASK_DP (1 << 5) // DP intr mask +#define MI_INTR_MASK_SP (1 << 0) /* SP intr mask */ +#define MI_INTR_MASK_SI (1 << 1) /* SI intr mask */ +#define MI_INTR_MASK_AI (1 << 2) /* AI intr mask */ +#define MI_INTR_MASK_VI (1 << 3) /* VI intr mask */ +#define MI_INTR_MASK_PI (1 << 4) /* PI intr mask */ +#define MI_INTR_MASK_DP (1 << 5) /* DP intr mask */ /** @@ -454,90 +454,90 @@ #define VI_CONTROL_REG (VI_BASE_REG + 0x00) #define VI_STATUS_REG VI_CONTROL_REG -// VI origin (R/W): [23:0] frame buffer origin in bytes +/* VI origin (R/W): [23:0] frame buffer origin in bytes */ #define VI_ORIGIN_REG (VI_BASE_REG + 0x04) #define VI_DRAM_ADDR_REG VI_ORIGIN_REG -// VI width (R/W): [11:0] frame buffer line width in pixels +/* VI width (R/W): [11:0] frame buffer line width in pixels */ #define VI_WIDTH_REG (VI_BASE_REG + 0x08) #define VI_H_WIDTH_REG VI_WIDTH_REG -// VI vertical intr (R/W): [9:0] interrupt when current half-line = V_INTR +/* VI vertical intr (R/W): [9:0] interrupt when current half-line = V_INTR */ #define VI_INTR_REG (VI_BASE_REG + 0x0C) #define VI_V_INTR_REG VI_INTR_REG -// VI current vertical line (R/W): [9:0] current half line, sampled once per -// line (the lsb of V_CURRENT is constant within a field, and in interlaced -// modes gives the field number - which is constant for non-interlaced modes) -// - Any write to this register will clear interrupt line +/* VI current vertical line (R/W): [9:0] current half line, sampled once per */ +/* line (the lsb of V_CURRENT is constant within a field, and in interlaced */ +/* modes gives the field number - which is constant for non-interlaced modes) */ +/* - Any write to this register will clear interrupt line */ #define VI_CURRENT_REG (VI_BASE_REG + 0x10) #define VI_V_CURRENT_LINE_REG VI_CURRENT_REG -// VI video timing (R/W): [29:20] start of color burst in pixels from h-sync -// [19:16] vertical sync width in half lines, -// [15: 8] color burst width in pixels, -// [ 7: 0] horizontal sync width in pixels, +/* VI video timing (R/W): [29:20] start of color burst in pixels from h-sync */ +/* [19:16] vertical sync width in half lines, */ +/* [15: 8] color burst width in pixels, */ +/* [ 7: 0] horizontal sync width in pixels, */ #define VI_BURST_REG (VI_BASE_REG + 0x14) #define VI_TIMING_REG VI_BURST_REG -// VI vertical sync (R/W): [9:0] number of half-lines per field +/* VI vertical sync (R/W): [9:0] number of half-lines per field */ #define VI_V_SYNC_REG (VI_BASE_REG + 0x18) -// VI horizontal sync (R/W): [20:16] a 5-bit leap pattern used for PAL only (h_sync_period) -// [11: 0] total duration of a line in 1/4 pixel +/* VI horizontal sync (R/W): [20:16] a 5-bit leap pattern used for PAL only (h_sync_period) */ +/* [11: 0] total duration of a line in 1/4 pixel */ #define VI_H_SYNC_REG (VI_BASE_REG + 0x1C) -// VI horizontal sync leap (R/W): [27:16] identical to h_sync_period -// [11: 0] identical to h_sync_period +/* VI horizontal sync leap (R/W): [27:16] identical to h_sync_period */ +/* [11: 0] identical to h_sync_period */ #define VI_LEAP_REG (VI_BASE_REG + 0x20) #define VI_H_SYNC_LEAP_REG VI_LEAP_REG -// VI horizontal video (R/W): [25:16] start of active video in screen pixels -// [ 9: 0] end of active video in screen pixels +/* VI horizontal video (R/W): [25:16] start of active video in screen pixels */ +/* [ 9: 0] end of active video in screen pixels */ #define VI_H_START_REG (VI_BASE_REG + 0x24) #define VI_H_VIDEO_REG VI_H_START_REG -// VI vertical video (R/W): [25:16] start of active video in screen half-lines -// [ 9: 0] end of active video in screen half-lines +/* VI vertical video (R/W): [25:16] start of active video in screen half-lines */ +/* [ 9: 0] end of active video in screen half-lines */ #define VI_V_START_REG (VI_BASE_REG + 0x28) #define VI_V_VIDEO_REG VI_V_START_REG -// VI vertical burst (R/W): [25:16] start of color burst enable in half-lines -// [ 9: 0] end of color burst enable in half-lines +/* VI vertical burst (R/W): [25:16] start of color burst enable in half-lines */ +/* [ 9: 0] end of color burst enable in half-lines */ #define VI_V_BURST_REG (VI_BASE_REG + 0x2C) -// VI x-scale (R/W): [27:16] horizontal subpixel offset (2.10 format) -// [11: 0] 1/horizontal scale up factor (2.10 format) +/* VI x-scale (R/W): [27:16] horizontal subpixel offset (2.10 format) */ +/* [11: 0] 1/horizontal scale up factor (2.10 format) */ #define VI_X_SCALE_REG (VI_BASE_REG + 0x30) -// VI y-scale (R/W): [27:16] vertical subpixel offset (2.10 format) -// [11: 0] 1/vertical scale up factor (2.10 format) +/* VI y-scale (R/W): [27:16] vertical subpixel offset (2.10 format) */ +/* [11: 0] 1/vertical scale up factor (2.10 format) */ #define VI_Y_SCALE_REG (VI_BASE_REG + 0x34) /* * VI_CONTROL_REG: read bits */ -#define VI_CTRL_TYPE_16 0x00002 // [1:0] pixel size: 16 bit -#define VI_CTRL_TYPE_32 0x00003 // [1:0] pixel size: 32 bit -#define VI_CTRL_GAMMA_DITHER_ON 0x00004 // 2: default = on -#define VI_CTRL_GAMMA_ON 0x00008 // 3: default = on -#define VI_CTRL_DIVOT_ON 0x00010 // 4: default = on -#define VI_CTRL_SERRATE_ON 0x00040 // 6: on if interlaced -#define VI_CTRL_ANTIALIAS_MASK 0x00300 // [9:8] anti-alias mode -#define VI_CTRL_ANTIALIAS_MODE_0 0x00000 // Bit [9:8] anti-alias mode: AA enabled, resampling enabled, always fetch extra lines -#define VI_CTRL_ANTIALIAS_MODE_1 0x00100 // Bit [9:8] anti-alias mode: AA enabled, resampling enabled, fetch extra lines as-needed -#define VI_CTRL_ANTIALIAS_MODE_2 0x00200 // Bit [9:8] anti-alias mode: AA disabled, resampling enabled, operate as if everything is covered -#define VI_CTRL_ANTIALIAS_MODE_3 0x00300 // Bit [9:8] anti-alias mode: AA disabled, resampling disabled, replicate pixels -#define VI_CTRL_PIXEL_ADV_MASK 0x0F000 // [15:12] pixel advance mode -#define VI_CTRL_PIXEL_ADV_3 0x03000 // Bit [15:12] pixel advance mode: Always 3 on N64 -#define VI_CTRL_DITHER_FILTER_ON 0x10000 // 16: dither-filter mode +#define VI_CTRL_TYPE_16 0x00002 /* [1:0] pixel size: 16 bit */ +#define VI_CTRL_TYPE_32 0x00003 /* [1:0] pixel size: 32 bit */ +#define VI_CTRL_GAMMA_DITHER_ON 0x00004 /* 2: default = on */ +#define VI_CTRL_GAMMA_ON 0x00008 /* 3: default = on */ +#define VI_CTRL_DIVOT_ON 0x00010 /* 4: default = on */ +#define VI_CTRL_SERRATE_ON 0x00040 /* 6: on if interlaced */ +#define VI_CTRL_ANTIALIAS_MASK 0x00300 /* [9:8] anti-alias mode */ +#define VI_CTRL_ANTIALIAS_MODE_0 0x00000 /* Bit [9:8] anti-alias mode: AA enabled, resampling enabled, always fetch extra lines */ +#define VI_CTRL_ANTIALIAS_MODE_1 0x00100 /* Bit [9:8] anti-alias mode: AA enabled, resampling enabled, fetch extra lines as-needed */ +#define VI_CTRL_ANTIALIAS_MODE_2 0x00200 /* Bit [9:8] anti-alias mode: AA disabled, resampling enabled, operate as if everything is covered */ +#define VI_CTRL_ANTIALIAS_MODE_3 0x00300 /* Bit [9:8] anti-alias mode: AA disabled, resampling disabled, replicate pixels */ +#define VI_CTRL_PIXEL_ADV_MASK 0x0F000 /* [15:12] pixel advance mode */ +#define VI_CTRL_PIXEL_ADV(n) (((n) << 12) & VI_CTRL_PIXEL_ADV_MASK) /* Bit [15:12] pixel advance mode: Always 3 on N64 */ +#define VI_CTRL_DITHER_FILTER_ON 0x10000 /* 16: dither-filter mode */ /* * Possible video clocks (NTSC or PAL) */ -#define VI_NTSC_CLOCK 48681812 // Hz = 48.681812 MHz -#define VI_PAL_CLOCK 49656530 // Hz = 49.656530 MHz -#define VI_MPAL_CLOCK 48628316 // Hz = 48.628316 MHz +#define VI_NTSC_CLOCK 48681812 /* Hz = 48.681812 MHz */ +#define VI_PAL_CLOCK 49656530 /* Hz = 49.656530 MHz */ +#define VI_MPAL_CLOCK 48628316 /* Hz = 48.628316 MHz */ /** @@ -549,25 +549,25 @@ */ #define AI_BASE_REG 0x04500000 -// AI DRAM address (W): [23:0] starting RDRAM address (8B-aligned) +/* AI DRAM address (W): [23:0] starting RDRAM address (8B-aligned) */ #define AI_DRAM_ADDR_REG (AI_BASE_REG + 0x00) -// AI length (R/W): [14:0] transfer length (v1.0) - Bottom 3 bits are ignored -// [17:0] transfer length (v2.0) - Bottom 3 bits are ignored +/* AI length (R/W): [14:0] transfer length (v1.0) - Bottom 3 bits are ignored */ +/* [17:0] transfer length (v2.0) - Bottom 3 bits are ignored */ #define AI_LEN_REG (AI_BASE_REG + 0x04) -// AI control (W): [0] DMA enable - if LSB == 1, DMA is enabled +/* AI control (W): [0] DMA enable - if LSB == 1, DMA is enabled */ #define AI_CONTROL_REG (AI_BASE_REG + 0x08) /* * AI_CONTROL_REG: write bits */ -#define AI_CONTROL_DMA_ON 1 // LSB = 1: DMA enable -#define AI_CONTROL_DMA_OFF 0 // LSB = 1: DMA enable +#define AI_CONTROL_DMA_ON 1 /* LSB = 1: DMA enable */ +#define AI_CONTROL_DMA_OFF 0 /* LSB = 1: DMA enable */ -// AI status (R): [31]/[0] ai_full (addr & len buffer full), [30] ai_busy -// Note that a 1->0 transition in ai_full will set interrupt -// (W): clear audio interrupt +/* AI status (R): [31]/[0] ai_full (addr & len buffer full), [30] ai_busy */ +/* Note that a 1->0 transition in ai_full will set interrupt */ +/* (W): clear audio interrupt */ #define AI_STATUS_REG (AI_BASE_REG + 0x0C) /* @@ -576,23 +576,23 @@ #define AI_STATUS_FIFO_FULL (1 << 31) #define AI_STATUS_DMA_BUSY (1 << 30) -// AI DAC sample period register (W): [13:0] dac rate -// - vid_clock/(dperiod + 1) is the DAC sample rate -// - (dperiod + 1) >= 66 * (aclockhp + 1) must be true +/* AI DAC sample period register (W): [13:0] dac rate */ +/* - vid_clock/(dperiod + 1) is the DAC sample rate */ +/* - (dperiod + 1) >= 66 * (aclockhp + 1) must be true */ #define AI_DACRATE_REG (AI_BASE_REG + 0x10) -// DAC rate = video clock / audio frequency -// - DAC rate >= (66 * Bit rate) must be true -#define AI_MAX_DAC_RATE 16384 // 14-bit+1 +/* DAC rate = video clock / audio frequency */ +/* - DAC rate >= (66 * Bit rate) must be true */ +#define AI_MAX_DAC_RATE 16384 /* 14-bit+1 */ #define AI_MIN_DAC_RATE 132 -// AI bit rate (W): [3:0] bit rate (abus clock half period register - aclockhp) -// - vid_clock/(2 * (aclockhp + 1)) is the DAC clock rate -// - The abus clock stops if aclockhp is zero +/* AI bit rate (W): [3:0] bit rate (abus clock half period register - aclockhp) */ +/* - vid_clock/(2 * (aclockhp + 1)) is the DAC clock rate */ +/* - The abus clock stops if aclockhp is zero */ #define AI_BITRATE_REG (AI_BASE_REG + 0x14) -// Bit rate <= (DAC rate / 66) -#define AI_MAX_BIT_RATE 16 // 4-bit+1 +/* Bit rate <= (DAC rate / 66) */ +#define AI_MAX_BIT_RATE 16 /* 4-bit+1 */ #define AI_MIN_BIT_RATE 2 /* @@ -600,14 +600,14 @@ * max frequency = (video clock / min dac rate) * min frequency = (video clock / max dac rate) */ -#define AI_NTSC_MAX_FREQ 368000 // 368 KHz -#define AI_NTSC_MIN_FREQ 3000 // 3 KHz ~ 2971 Hz +#define AI_NTSC_MAX_FREQ 368000 /* 368 KHz */ +#define AI_NTSC_MIN_FREQ 3000 /* 3 KHz ~ 2971 Hz */ -#define AI_PAL_MAX_FREQ 376000 // 376 KHz -#define AI_PAL_MIN_FREQ 3050 // 3 KHz ~ 3031 Hz +#define AI_PAL_MAX_FREQ 376000 /* 376 KHz */ +#define AI_PAL_MIN_FREQ 3050 /* 3 KHz ~ 3031 Hz */ -#define AI_MPAL_MAX_FREQ 368000 // 368 KHz -#define AI_MPAL_MIN_FREQ 3000 // 3 KHz ~ 2968 Hz +#define AI_MPAL_MAX_FREQ 368000 /* 368 KHz */ +#define AI_MPAL_MIN_FREQ 3000 /* 3 KHz ~ 2968 Hz */ /** @@ -615,44 +615,44 @@ */ #define PI_BASE_REG 0x04600000 -// PI DRAM address (R/W): [23:0] starting RDRAM address +/* PI DRAM address (R/W): [23:0] starting RDRAM address */ #define PI_DRAM_ADDR_REG (PI_BASE_REG + 0x00) -// PI pbus (cartridge) address (R/W): [31:0] starting AD16 address +/* PI pbus (cartridge) address (R/W): [31:0] starting AD16 address */ #define PI_CART_ADDR_REG (PI_BASE_REG + 0x04) -// PI read length (R/W): [23:0] read data length +/* PI read length (R/W): [23:0] read data length */ #define PI_RD_LEN_REG (PI_BASE_REG + 0x08) -// PI write length (R/W): [23:0] write data length +/* PI write length (R/W): [23:0] write data length */ #define PI_WR_LEN_REG (PI_BASE_REG + 0x0C) -// PI status (R): [3] interrupt flag, [2] error, [1] IO busy, [0] DMA busy -// (W): [1] clear intr, [0] reset controller (and abort current op) +/* PI status (R): [3] interrupt flag, [2] error, [1] IO busy, [0] DMA busy */ +/* (W): [1] clear intr, [0] reset controller (and abort current op) */ #define PI_STATUS_REG (PI_BASE_REG + 0x10) -// PI dom1 latency (R/W): [7:0] domain 1 device latency +/* PI dom1 latency (R/W): [7:0] domain 1 device latency */ #define PI_BSD_DOM1_LAT_REG (PI_BASE_REG + 0x14) -// PI dom1 pulse width (R/W): [7:0] domain 1 device R/W strobe pulse width +/* PI dom1 pulse width (R/W): [7:0] domain 1 device R/W strobe pulse width */ #define PI_BSD_DOM1_PWD_REG (PI_BASE_REG + 0x18) -// PI dom1 page size (R/W): [3:0] domain 1 device page size +/* PI dom1 page size (R/W): [3:0] domain 1 device page size */ #define PI_BSD_DOM1_PGS_REG (PI_BASE_REG + 0x1C) -// PI dom1 release (R/W): [1:0] domain 1 device R/W release duration +/* PI dom1 release (R/W): [1:0] domain 1 device R/W release duration */ #define PI_BSD_DOM1_RLS_REG (PI_BASE_REG + 0x20) -// PI dom2 latency (R/W): [7:0] domain 2 device latency +/* PI dom2 latency (R/W): [7:0] domain 2 device latency */ #define PI_BSD_DOM2_LAT_REG (PI_BASE_REG + 0x24) -// PI dom2 pulse width (R/W): [7:0] domain 2 device R/W strobe pulse width +/* PI dom2 pulse width (R/W): [7:0] domain 2 device R/W strobe pulse width */ #define PI_BSD_DOM2_PWD_REG (PI_BASE_REG + 0x28) -// PI dom2 page size (R/W): [3:0] domain 2 device page size +/* PI dom2 page size (R/W): [3:0] domain 2 device page size */ #define PI_BSD_DOM2_PGS_REG (PI_BASE_REG + 0x2C) -// PI dom2 release (R/W): [1:0] domain 2 device R/W release duration +/* PI dom2 release (R/W): [1:0] domain 2 device R/W release duration */ #define PI_BSD_DOM2_RLS_REG (PI_BASE_REG + 0x30) #define PI_DOMAIN1_REG PI_BSD_DOM1_LAT_REG @@ -719,30 +719,30 @@ */ #define RI_BASE_REG 0x04700000 -// RI mode (R/W): [3] stop R active, [2] stop T active, [1:0] operating mode +/* RI mode (R/W): [3] stop R active, [2] stop T active, [1:0] operating mode */ #define RI_MODE_REG (RI_BASE_REG + 0x00) -// RI config (R/W): [6] current control enable, [5:0] current control input +/* RI config (R/W): [6] current control enable, [5:0] current control input */ #define RI_CONFIG_REG (RI_BASE_REG + 0x04) -// RI current load (W): [] any write updates current control register +/* RI current load (W): [] any write updates current control register */ #define RI_CURRENT_LOAD_REG (RI_BASE_REG + 0x08) -// RI select (R/W): [3:2] receive select, [1:0] transmit select +/* RI select (R/W): [3:2] receive select, [1:0] transmit select */ #define RI_SELECT_REG (RI_BASE_REG + 0x0C) -// RI refresh (R/W): [16] refresh bank, [17] refresh enable, [18] refresh optimize -// [7:0] clean refresh delay, [15:8] dirty refresh dela +/* RI refresh (R/W): [16] refresh bank, [17] refresh enable, [18] refresh optimize */ +/* [7:0] clean refresh delay, [15:8] dirty refresh dela */ #define RI_REFRESH_REG (RI_BASE_REG + 0x10) #define RI_COUNT_REG RI_REFRESH_REG -// RI latency (R/W): [3:0] DMA latency/overlap +/* RI latency (R/W): [3:0] DMA latency/overlap */ #define RI_LATENCY_REG (RI_BASE_REG + 0x14) -// RI error (R): [1] ack error, [0] nack error +/* RI error (R): [1] ack error, [0] nack error */ #define RI_RERROR_REG (RI_BASE_REG + 0x18) -// RI error (W): [] any write clears all error bits +/* RI error (W): [] any write clears all error bits */ #define RI_WERROR_REG (RI_BASE_REG + 0x1C) @@ -751,27 +751,27 @@ */ #define SI_BASE_REG 0x04800000 -// SI DRAM address (R/W): [23:0] starting RDRAM address +/* SI DRAM address (R/W): [23:0] starting RDRAM address */ #define SI_DRAM_ADDR_REG (SI_BASE_REG + 0x00) -// SI address read 64B (W): [] write begins a 64B DMA write PIF RAM -> RDRAM +/* SI address read 64B (W): [] write begins a 64B DMA write PIF RAM -> RDRAM */ #define SI_PIF_ADDR_RD64B_REG (SI_BASE_REG + 0x04) -// Address SI_BASE_REG + (0x08, 0x0C, 0x14) are reserved +/* Address SI_BASE_REG + (0x08, 0x0C, 0x14) are reserved */ -// SI address write 64B (W): [] write begins a 64B DMA read RDRAM -> PIF RAM */ +/* SI address write 64B (W): [] write begins a 64B DMA read RDRAM -> PIF RAM */ #define SI_PIF_ADDR_WR64B_REG (SI_BASE_REG + 0x10) -// SI status (R/W): [] any write clears interrupt +/* SI status (R/W): [] any write clears interrupt */ #define SI_STATUS_REG (SI_BASE_REG + 0x18) /* * SI_STATUS_REG: read bits */ -#define SI_STATUS_DMA_BUSY (1 << 0) // DMA in progress -#define SI_STATUS_RD_BUSY (1 << 1) // IO access in progress -#define SI_STATUS_DMA_ERROR (1 << 3) // Overlapping DMA requests -#define SI_STATUS_INTERRUPT (1 << 12) // Interrupt is set +#define SI_STATUS_DMA_BUSY (1 << 0) /* DMA in progress */ +#define SI_STATUS_RD_BUSY (1 << 1) /* IO access in progress */ +#define SI_STATUS_DMA_ERROR (1 << 3) /* Overlapping DMA requests */ +#define SI_STATUS_INTERRUPT (1 << 12) /* Interrupt is set */ /** @@ -780,13 +780,13 @@ #define GIO_BASE_REG 0x18000000 -// Game to Host Interrupt +/* Game to Host Interrupt */ #define GIO_GIO_INTR_REG (GIO_BASE_REG+0x000) -// Game to Host SYNC +/* Game to Host SYNC */ #define GIO_GIO_SYNC_REG (GIO_BASE_REG+0x400) -// Host to Game Interrupt +/* Host to Game Interrupt */ #define GIO_CART_INTR_REG (GIO_BASE_REG+0x800) diff --git a/include/ultra64/regdef.h b/include/ultra64/regdef.h new file mode 100644 index 0000000000..a3b2832180 --- /dev/null +++ b/include/ultra64/regdef.h @@ -0,0 +1,164 @@ +#ifndef REGDEF_H +#define REGDEF_H + +#ifdef __GNUC__ +#define _MIPS_SIM_ABI32 _ABIO32 +#define _MIPS_SIM_NABI32 _ABIN32 +#define _MIPS_SIM_ABI64 _ABI64 +#endif + +#if (_MIPS_SIM == _MIPS_SIM_ABI32) +#define zero $0 +#define AT $at +#define v0 $2 +#define v1 $3 +#define a0 $4 +#define a1 $5 +#define a2 $6 +#define a3 $7 +#define t0 $8 +#define t1 $9 +#define t2 $10 +#define t3 $11 +#define t4 $12 +#define ta0 $12 +#define t5 $13 +#define ta1 $13 +#define t6 $14 +#define ta2 $14 +#define t7 $15 +#define ta3 $15 +#define s0 $16 +#define s1 $17 +#define s2 $18 +#define s3 $19 +#define s4 $20 +#define s5 $21 +#define s6 $22 +#define s7 $23 +#define t8 $24 +#define t9 $25 +#define jp $25 +#define k0 $26 +#define k1 $27 +#define gp $28 +#define sp $29 +#define fp $30 +#define s8 $30 +#define ra $31 +#endif + +#if (_MIPS_SIM == _MIPS_SIM_ABI64) +#define zero $0 +#define AT $at +#define v0 $2 +#define v1 $3 +#define a0 $4 +#define a1 $5 +#define a2 $6 +#define a3 $7 +#define a4 $8 +#define ta0 $8 +#define a5 $9 +#define ta1 $9 +#define a6 $10 +#define ta2 $10 +#define a7 $11 +#define ta3 $11 +#define t0 $12 +#define t1 $13 +#define t2 $14 +#define t3 $15 +#define s0 $16 +#define s1 $17 +#define s2 $18 +#define s3 $19 +#define s4 $20 +#define s5 $21 +#define s6 $22 +#define s7 $23 +#define t8 $24 +#define t9 $25 +#define jp $25 +#define k0 $26 +#define k1 $27 +#define gp $28 +#define sp $29 +#define fp $30 +#define s8 $30 +#define ra $31 +#endif + +#if (_MIPS_SIM == _MIPS_SIM_ABI32) +#define fv0 $f0 +#define fv0f $f1 +#define fv1 $f2 +#define fv1f $f3 +#define fa0 $f12 +#define fa0f $f13 +#define fa1 $f14 +#define fa1f $f15 +#define ft0 $f4 +#define ft0f $f5 +#define ft1 $f6 +#define ft1f $f7 +#define ft2 $f8 +#define ft2f $f9 +#define ft3 $f10 +#define ft3f $f11 +#define ft4 $f16 +#define ft4f $f17 +#define ft5 $f18 +#define ft5f $f19 +#define fs0 $f20 +#define fs0f $f21 +#define fs1 $f22 +#define fs1f $f23 +#define fs2 $f24 +#define fs2f $f25 +#define fs3 $f26 +#define fs3f $f27 +#define fs4 $f28 +#define fs4f $f29 +#define fs5 $f30 +#define fs5f $f31 +#endif + +#if (_MIPS_SIM == _MIPS_SIM_ABI64) +#define fv0 $f0 +#define fv1 $f2 +#define fa0 $f12 +#define fa1 $f13 +#define fa2 $f14 +#define fa3 $f15 +#define fa4 $f16 +#define fa5 $f17 +#define fa6 $f18 +#define fa7 $f19 +#define ft0 $f4 +#define ft1 $f5 +#define ft2 $f6 +#define ft3 $f7 +#define ft4 $f8 +#define ft5 $f9 +#define ft6 $f10 +#define ft7 $f11 +#define ft8 $f20 +#define ft9 $f21 +#define ft10 $f22 +#define ft11 $f23 +#define ft12 $f1 +#define ft13 $f3 +#define fs0 $f24 +#define fs1 $f25 +#define fs2 $f26 +#define fs3 $f27 +#define fs4 $f28 +#define fs5 $f29 +#define fs6 $f30 +#define fs7 $f31 +#endif + +#define fcr31 $31 + +#endif diff --git a/include/ultra64/thread.h b/include/ultra64/thread.h index 813fefaa97..3c2733f150 100644 --- a/include/ultra64/thread.h +++ b/include/ultra64/thread.h @@ -73,7 +73,7 @@ typedef struct __OSThreadTail { #else -// OSThread struct member offsets +/* OSThread struct member offsets */ #define THREAD_NEXT 0x00 #define THREAD_PRI 0x04 diff --git a/src/boot/mio0.s b/src/boot/mio0.s index a3fb8eba26..a6c2eee6bc 100644 --- a/src/boot/mio0.s +++ b/src/boot/mio0.s @@ -1,10 +1,7 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" -.set noreorder - -.section .text - -.balign 16 +.text /** * void Mio0_Decompress(void* src, void* dst); @@ -12,53 +9,56 @@ * Decompress Mio0 chunk */ LEAF(Mio0_Decompress) - lw $a3, 8($a0) // compressed offset - lw $t9, 0xC($a0) // uncompressed offset - lw $t8, 4($a0) // decompressed length - add $a3, $a3, $a0 // compressed start - add $t9, $t9, $a0 // uncompressed start - move $a2, $zero // 0 - addi $a0, $a0, 0x10 // move past header - add $t8, $t8, $a1 // dst + decompressed length = end + lw a3, 0x08(a0) /* compressed offset */ + lw t9, 0x0C(a0) /* uncompressed offset */ + lw t8, 0x04(a0) /* decompressed length */ + add a3, a3, a0 /* compressed start */ + add t9, t9, a0 /* uncompressed start */ + move a2, zero /* 0 */ + addi a0, a0, 0x10 /* move past header */ + add t8, t8, a1 /* dst + decompressed length = end */ mainloop: - bnez $a2, 1f - nop - lw $t0, ($a0) - li $a2, 32 - addi $a0, $a0, 4 + bnez a2, 1f + + lw t0, (a0) + li a2, 32 + addi a0, a0, 4 1: - slt $t1, $t0, $zero - beqz $t1, read_comp - nop - lb $t2, ($t9) // read 1 byte from uncompressed data - addi $t9, $t9, 1 // advance uncompressed start - addi $a1, $a1, 1 + slt t1, t0, zero + beqz t1, read_comp + + lb t2, (t9) /* read 1 byte from uncompressed data */ + sb t2, (a1) /* store uncompressed byte */ + addi t9, t9, 1 /* advance uncompressed start */ + addi a1, a1, 1 b next_iter - sb $t2, -1($a1) // store uncompressed byte + read_comp: - lhu $t2, ($a3) // read 2 bytes from compressed data - addi $a3, $a3, 2 // advance compressed start - srl $t3, $t2, 0xC - andi $t2, $t2, 0xFFF - beqz $t3, 3f - sub $t1, $a1, $t2 - addi $t3, $t3, 2 + lhu t2, (a3) /* read 2 bytes from compressed data */ + addi a3, a3, 2 /* advance compressed start */ + srl t3, t2, 0xC + andi t2, t2, 0xFFF + sub t1, a1, t2 + beqz t3, 3f + + addi t3, t3, 2 2: - lb $t2, -1($t1) - addi $t3, $t3, -1 - addi $t1, $t1, 1 - addi $a1, $a1, 1 - bnez $t3, 2b - sb $t2, -1($a1) + lb t2, -1(t1) + addi t3, t3, -1 + addi t1, t1, 1 + sb t2, (a1) + addi a1, a1, 1 + bnez t3, 2b + next_iter: - sll $t0, $t0, 1 - bne $a1, $t8, mainloop // continue until decompressed length is reached - addi $a2, $a2, -1 - jr $ra - nop + sll t0, t0, 1 + addi a2, a2, -1 + bne a1, t8, mainloop /* continue until decompressed length is reached */ + + jr ra 3: - lbu $t3, ($t9) - addi $t9, $t9, 1 + lbu t3, (t9) + addi t9, t9, 1 + addi t3, t3, 0x12 b 2b - addi $t3, $t3, 0x12 END(Mio0_Decompress) diff --git a/src/code/kanread.s b/src/code/kanread.s index a163f9ead7..fdf69ed9be 100644 --- a/src/code/kanread.s +++ b/src/code/kanread.s @@ -1,11 +1,7 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" -.set noat -.set noreorder - -.section .text - -.balign 16 +.text /** * s32 Kanji_OffsetFromShiftJIS(s32 sjis); @@ -26,64 +22,57 @@ * @remark Almost identical to "LeoGetKadr" from libleo. */ LEAF(Kanji_OffsetFromShiftJIS) - // Characters with codepoints >= 0x8800 are kanji. Arrangement is regular, - // so convert index directly. - li $at, 0x8800 - slt $at, $a0, $at - bnez $at, .nonkanji - // 0xBC is number of glyphs in one block in the `kanji` file: - // 0x100 possible codepoints with the same byte1 - // - 0x40 unused at beginning - // - 1 unused at 0x7F - // - 3 unused at 0xFD, 0xFE, 0xFF - li $a2, 0xBC - // Get byte1 and adjust so starts at 0 - srl $a1, $a0, 8 - addi $a1, $a1, -0x88 - multu $a2, $a1 - // Get byte2 and adjust so starts at 0 - andi $a3, $a0, 0xFF - addi $a3, $a3, -0x40 - slti $at, $a3, (0x80 - 0x40) - mflo $a2 - // 0x__7F is always empty and elided in the file, so decrement if larger - bnezl $at, .kanji_lower_halfblock - mflo $a2 - addi $a3, $a3, -1 - mflo $a2 -.kanji_lower_halfblock: - addi $a3, $a3, 0x30A - add $a3, $a3, $a2 - jr $ra - sll $v0, $a3, 7 - // returns (0x30A + (adjusted byte2) + (adjusted byte1) * 0xBC) * FONT_CHAR_TEX_SIZE + /* Characters with codepoints >= 0x8800 are kanji. Arrangement is regular, */ + /* so convert index directly. */ + li a2, 0xBC + blt a0, 0x8800, .nonkanji + /* 0xBC is number of glyphs in one block in the `kanji` file: */ + /* 0x100 possible codepoints with the same byte1 */ + /* - 0x40 unused at beginning */ + /* - 1 unused at 0x7F */ + /* - 3 unused at 0xFD, 0xFE, 0xFF */ + /* Get byte1 and adjust so starts at 0 */ + srl a1, a0, 8 + addi a1, a1, -0x88 + /* Get byte2 and adjust so starts at 0 */ + andi a3, a0, 0xFF + addi a3, a3, -0x40 + mul a2, a2, a1 + blt a3, 0x40, .kanji_lower_halfblock -// Non-kanji are arranged with irregular gaps, use the lookup table. + /* 0x__7F is always empty and elided in the file, so decrement if larger */ + addi a3, a3, -1 +.kanji_lower_halfblock: + mflo a2 /* Unncessary mflo: the pseudo-op `mul` already performs mflo */ + addi a3, a3, 0x30A + add a3, a3, a2 + /* returns (0x30A + (adjusted byte2) + (adjusted byte1) * 0xBC) * FONT_CHAR_TEX_SIZE */ + sll v0, a3, 7 + jr ra + +/* Non-kanji are arranged with irregular gaps, use the lookup table. */ .nonkanji: - // Get byte1 and adjust so starts at 0 - srl $a1, $a0, 8 - addi $a1, $a1, -0x81 - multu $a2, $a1 - // Get byte2 and adjust so starts at 0 - andi $a3, $a0, 0xFF - addi $a3, $a3, -0x40 - slti $at, $a3, (0x80 - 0x40) - mflo $a2 - // 0x__7F is always empty and elided in the file, so decrement if larger - bnezl $at, .nonkanji_lower_halfblock - mflo $a2 - addi $a3, $a3, -1 - mflo $a2 + /* Get byte1 and adjust so starts at 0 */ + srl a1, a0, 8 + addi a1, a1, -0x81 + /* Get byte2 and adjust so starts at 0 */ + andi a3, a0, 0xFF + addi a3, a3, -0x40 + mul a2, a2, a1 + blt a3, 0x40, .nonkanji_lower_halfblock + + /* 0x__7F is always empty and elided in the file, so decrement if larger */ + addi a3, a3, -1 .nonkanji_lower_halfblock: - add $a3, $a3, $a2 - lui $a2, %hi(sNonKanjiIndices) - sll $a3, $a3, 1 - addiu $a2, %lo(sNonKanjiIndices) - add $a3, $a3, $a2 - lh $a2, ($a3) - jr $ra - sll $v0, $a2, 7 - // returns sNonKanjiIndices[(adjusted byte2) + (adjusted byte1) * 0xBC] * FONT_CHAR_TEX_SIZE + mflo a2 /* Unncessary mflo: the pseudo-op `mul` already performs mflo */ + add a3, a3, a2 + sll a3, a3, 1 + la a2, sNonKanjiIndices + add a3, a3, a2 + lh a2, (a3) + /* returns sNonKanjiIndices[(adjusted byte2) + (adjusted byte1) * 0xBC] * FONT_CHAR_TEX_SIZE */ + sll v0, a2, 7 + jr ra END(Kanji_OffsetFromShiftJIS) /** @@ -116,7 +105,7 @@ END(Kanji_OffsetFromShiftJIS) * into blocks by high byte. */ DATA(sNonKanjiIndices) -// 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F +/* 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F */ /* 0x814_ */ .half 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F /* 0x815_ */ .half 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F /* 0x816_ */ .half 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F @@ -130,7 +119,7 @@ DATA(sNonKanjiIndices) /* 0x81E_ */ .half 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 /* 0x81F_ */ .half 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0000, 0x0000, 0x0000, 0x0000, 0x0091 -// 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F +/* 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F */ /* 0x824_ */ .half 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0092 /* 0x825_ */ .half 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 /* 0x826_ */ .half 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB @@ -144,7 +133,7 @@ DATA(sNonKanjiIndices) /* 0x82E_ */ .half 0x0111, 0x0112, 0x0113, 0x0114, 0x0115, 0x0116, 0x0117, 0x0118, 0x0119, 0x011A, 0x011B, 0x011C, 0x011D, 0x011E, 0x011F, 0x0120 /* 0x82F_ */ .half 0x0121, 0x0122, 0x0123, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 -// 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F +/* 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F */ /* 0x834_ */ .half 0x0124, 0x0125, 0x0126, 0x0127, 0x0128, 0x0129, 0x012A, 0x012B, 0x012C, 0x012D, 0x012E, 0x012F, 0x0130, 0x0131, 0x0132, 0x0133 /* 0x835_ */ .half 0x0134, 0x0135, 0x0136, 0x0137, 0x0138, 0x0139, 0x013A, 0x013B, 0x013C, 0x013D, 0x013E, 0x013F, 0x0140, 0x0141, 0x0142, 0x0143 /* 0x836_ */ .half 0x0144, 0x0145, 0x0146, 0x0147, 0x0148, 0x0149, 0x014A, 0x014B, 0x014C, 0x014D, 0x014E, 0x014F, 0x0150, 0x0151, 0x0152, 0x0153 @@ -158,7 +147,7 @@ DATA(sNonKanjiIndices) /* 0x83E_ */ .half 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 /* 0x83F_ */ .half 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 -// 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F +/* 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F */ /* 0x844_ */ .half 0x01AB, 0x01AC, 0x01AD, 0x01AE, 0x01AF, 0x01B0, 0x01B1, 0x01B2, 0x01B3, 0x01B4, 0x01B5, 0x01B6, 0x01B7, 0x01B8, 0x01B9, 0x01BA /* 0x845_ */ .half 0x01BB, 0x01BC, 0x01BD, 0x01BE, 0x01BF, 0x01C0, 0x01C1, 0x01C2, 0x01C3, 0x01C4, 0x01C5, 0x01C6, 0x01C7, 0x01C8, 0x01C9, 0x01CA /* 0x846_ */ .half 0x01CB, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 @@ -172,7 +161,7 @@ DATA(sNonKanjiIndices) /* 0x84E_ */ .half 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 /* 0x84F_ */ .half 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 -// 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F +/* 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F */ /* 0x854_ */ .half 0x020E, 0x020F, 0x0210, 0x0211, 0x0212, 0x0213, 0x0214, 0x0215, 0x0216, 0x0217, 0x0218, 0x0219, 0x021A, 0x021B, 0x021C, 0x021D /* 0x855_ */ .half 0x021E, 0x021F, 0x0220, 0x0221, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0222, 0x0223 /* 0x856_ */ .half 0x0224, 0x0225, 0x0226, 0x0227, 0x0228, 0x0229, 0x022A, 0x022B, 0x022C, 0x022D, 0x022E, 0x022F, 0x0230, 0x0231, 0x0232, 0x0233 @@ -186,7 +175,7 @@ DATA(sNonKanjiIndices) /* 0x85E_ */ .half 0x026C, 0x026D, 0x026E, 0x026F, 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277, 0x0278, 0x0279, 0x027A, 0x027B /* 0x85F_ */ .half 0x027C, 0x027D, 0x027E, 0x027F, 0x0280, 0x0281, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 -// 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F +/* 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F */ /* 0x864_ */ .half 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287, 0x0288, 0x0289, 0x028A, 0x028B, 0x028C, 0x028D, 0x028E, 0x028F, 0x0290, 0x0291 /* 0x865_ */ .half 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, 0x0298, 0x0299, 0x029A, 0x029B, 0x029C, 0x029D, 0x029E, 0x029F, 0x0000, 0x0000 /* 0x866_ */ .half 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 @@ -200,7 +189,7 @@ DATA(sNonKanjiIndices) /* 0x86E_ */ .half 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 /* 0x86F_ */ .half 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 -// 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F +/* 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F */ /* 0x874_ */ .half 0x02C0, 0x02C1, 0x02C2, 0x02C3, 0x02C4, 0x02C5, 0x02C6, 0x02C7, 0x02C8, 0x02C9, 0x02CA, 0x02CB, 0x02CC, 0x02CD, 0x02CE, 0x02CF /* 0x875_ */ .half 0x02D0, 0x02D1, 0x02D2, 0x02D3, 0x02D4, 0x02D5, 0x02D6, 0x02D7, 0x02D8, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 /* 0x876_ */ .half 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 diff --git a/src/code/z_vimode.c b/src/code/z_vimode.c index 59fa5df2d0..0d2f5716fa 100644 --- a/src/code/z_vimode.c +++ b/src/code/z_vimode.c @@ -86,7 +86,7 @@ void ViMode_Configure(ViMode* viMode, s32 type, s32 tvType, s32 loRes, s32 antia yScaleHiOddField = modeF ? (loResInterlaced ? (F210(0.75) << 16) : (F210(0.5) << 16)) : 0; viMode->customViMode.type = type; - viMode->customViMode.comRegs.ctrl = VI_CTRL_PIXEL_ADV_3 | VI_CTRL_GAMMA_ON | VI_CTRL_GAMMA_DITHER_ON | + viMode->customViMode.comRegs.ctrl = VI_CTRL_PIXEL_ADV(3) | VI_CTRL_GAMMA_ON | VI_CTRL_GAMMA_DITHER_ON | (!loResDeinterlaced ? VI_CTRL_SERRATE_ON : 0) | (antialiasOn ? VI_CTRL_DIVOT_ON : 0) | (fb32Bit ? VI_CTRL_TYPE_32 : VI_CTRL_TYPE_16); diff --git a/src/libc/absf.s b/src/libc/absf.s index b423334614..58450e7791 100644 --- a/src/libc/absf.s +++ b/src/libc/absf.s @@ -1,15 +1,9 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" -#if DEBUG_FEATURES -.set noreorder -#endif - -.section .text - -.balign 16 +.text LEAF(absf) - abs.s $f0, $f12 - jr $ra - nop + abs.s fv0, fa0 + jr ra END(absf) diff --git a/src/libc/sqrt.s b/src/libc/sqrt.s index 6ffb2b7085..1d69d3adb2 100644 --- a/src/libc/sqrt.s +++ b/src/libc/sqrt.s @@ -1,15 +1,9 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" -#if DEBUG_FEATURES -.set noreorder -#endif - -.section .text - -.balign 16 +.text LEAF(sqrt) - sqrt.d $f0, $f12 - jr $ra - nop + sqrt.d fv0, fa0 + jr ra END(sqrt) diff --git a/src/libc64/fp.s b/src/libc64/fp.s index 117e7815e6..32c084ef56 100644 --- a/src/libc64/fp.s +++ b/src/libc64/fp.s @@ -1,12 +1,10 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" -.set noreorder - -.section .data - -.balign 16 +.data #if !PLATFORM_N64 + DATA(gPositiveInfinity) .word 0x7F800000 ENDDATA(gPositiveInfinity) @@ -34,150 +32,135 @@ ENDDATA(qNaN0x10000) DATA(sNaN0x3FFFFF) .word 0x7FFFFFFF ENDDATA(sNaN0x3FFFFF) + #endif -.section .text - -.balign 16 +.text LEAF(floorf) - floor.w.s $f12, $f12 - jr $ra - cvt.s.w $f0, $f12 + floor.w.s fa0, fa0 + cvt.s.w fv0, fa0 + jr ra END(floorf) LEAF(floor) - floor.w.d $f12, $f12 - jr $ra - cvt.d.w $f0, $f12 + floor.w.d fa0, fa0 + cvt.d.w fv0, fa0 + jr ra END(floor) LEAF(lfloorf) - floor.w.s $f4, $f12 - mfc1 $v0, $f4 - jr $ra - nop + floor.w.s ft0, fa0 + mfc1 v0, ft0 + jr ra END(lfloorf) LEAF(lfloor) - floor.w.d $f4, $f12 - mfc1 $v0, $f4 - jr $ra - nop + floor.w.d ft0, fa0 + mfc1 v0, ft0 + jr ra END(lfloor) LEAF(ceilf) - ceil.w.s $f12, $f12 - jr $ra - cvt.s.w $f0, $f12 + ceil.w.s fa0, fa0 + cvt.s.w fv0, fa0 + jr ra END(ceilf) LEAF(ceil) - ceil.w.d $f12, $f12 - jr $ra - cvt.d.w $f0, $f12 + ceil.w.d fa0, fa0 + cvt.d.w fv0, fa0 + jr ra END(ceil) LEAF(lceilf) - ceil.w.s $f4, $f12 - mfc1 $v0, $f4 - jr $ra - nop + ceil.w.s ft0, fa0 + mfc1 v0, ft0 + jr ra END(lceilf) LEAF(lceil) - ceil.w.d $f4, $f12 - mfc1 $v0, $f4 - jr $ra - nop + ceil.w.d ft0, fa0 + mfc1 v0, ft0 + jr ra END(lceil) LEAF(truncf) - trunc.w.s $f12, $f12 - jr $ra - cvt.s.w $f0, $f12 + trunc.w.s fa0, fa0 + cvt.s.w fv0, fa0 + jr ra END(truncf) LEAF(trunc) - trunc.w.d $f12, $f12 - jr $ra - cvt.d.w $f0, $f12 + trunc.w.d fa0, fa0 + cvt.d.w fv0, fa0 + jr ra END(trunc) LEAF(ltruncf) - trunc.w.s $f4, $f12 - mfc1 $v0, $f4 - jr $ra - nop + trunc.w.s ft0, fa0 + mfc1 v0, ft0 + jr ra END(ltruncf) LEAF(ltrunc) - trunc.w.d $f4, $f12 - mfc1 $v0, $f4 - jr $ra - nop + trunc.w.d ft0, fa0 + mfc1 v0, ft0 + jr ra END(ltrunc) LEAF(nearbyintf) - round.w.s $f12, $f12 - jr $ra - cvt.s.w $f0, $f12 + round.w.s fa0, fa0 + cvt.s.w fv0, fa0 + jr ra END(nearbyintf) LEAF(nearbyint) - round.w.d $f12, $f12 - jr $ra - cvt.d.w $f0, $f12 + round.w.d fa0, fa0 + cvt.d.w fv0, fa0 + jr ra END(nearbyint) LEAF(lnearbyintf) - round.w.s $f4, $f12 - mfc1 $v0, $f4 - jr $ra - nop + round.w.s ft0, fa0 + mfc1 v0, ft0 + jr ra END(lnearbyintf) LEAF(lnearbyint) - round.w.d $f4, $f12 - mfc1 $v0, $f4 - jr $ra - nop + round.w.d ft0, fa0 + mfc1 v0, ft0 + jr ra END(lnearbyint) LEAF(roundf) - li.s $f4, 0.5 - nop - add.s $f0, $f12, $f4 - floor.w.s $f0, $f0 - jr $ra - cvt.s.w $f0, $f0 + li.s ft0, 0.5 + add.s fv0, fa0, ft0 + floor.w.s fv0, fv0 + cvt.s.w fv0, fv0 + jr ra END(roundf) LEAF(round) - li.d $f4, 0.5 - nop - add.d $f0, $f12, $f4 - floor.w.d $f0, $f0 - jr $ra - cvt.d.w $f0, $f0 + li.d ft0, 0.5 + add.d fv0, fa0, ft0 + floor.w.d fv0, fv0 + cvt.d.w fv0, fv0 + jr ra END(round) LEAF(lroundf) - li.s $f4, 0.5 - nop - add.s $f0, $f12, $f4 - floor.w.s $f0, $f0 - mfc1 $v0, $f0 - jr $ra - nop + li.s ft0, 0.5 + add.s fv0, fa0, ft0 + floor.w.s fv0, fv0 + mfc1 v0, fv0 + jr ra END(lroundf) LEAF(lround) - li.d $f4, 0.5 - nop - add.d $f0, $f12, $f4 - floor.w.d $f0, $f0 - mfc1 $v0, $f0 - jr $ra - nop + li.d ft0, 0.5 + add.d fv0, fa0, ft0 + floor.w.d fv0, fv0 + mfc1 v0, fv0 + jr ra END(lround) diff --git a/src/libleo/api/getaadr.s b/src/libleo/api/getaadr.s index a36d27a5b3..95c1cfb14d 100644 --- a/src/libleo/api/getaadr.s +++ b/src/libleo/api/getaadr.s @@ -1,42 +1,33 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" -.set noat -.set noreorder - -.section .text - -.balign 16 +.text LEAF(LeoGetAAdr) - bltz $a0, .out_of_range - slti $at, $a0, 0x908 - beqz $at, .out_of_range - nop - lui $v1, %hi(asc2tbl) - sll $t0, $a0, 2 - addiu $v1, %lo(asc2tbl) - add $t1, $t0, $v1 - lbu $t8, 2($t1) - lhu $t9, 0($t1) - li $at, 0x70000 - andi $t2, $t8, 0xF - addi $t3, $t2, 1 - sw $t3, ($a2) - lb $t0, 3($t1) - srl $t4, $t8, 4 - ori $at, $at, 0xEE80 - andi $t5, $t0, 1 - sll $t6, $t5, 4 - or $t7, $t6, $t4 - sw $t7, ($a1) - sll $v0, $t9, 1 - sra $v1, $t0, 1 - sw $v1, ($a3) - jr $ra - add $v0, $v0, $at + bltz a0, .out_of_range + bge a0, 0x908, .out_of_range + sll t0, a0, 2 + la v1, asc2tbl + add t1, t0, v1 + lbu t8, 2(t1) + lhu t9, 0(t1) + andi t2, t8, 0xF + addi t3, t2, 1 + sw t3, (a2) + lb t0, 3(t1) + srl t4, t8, 4 + andi t5, t0, 1 + sll t6, t5, 4 + or t7, t6, t4 + sw t7, (a1) + sll v0, t9, 1 + sra v1, t0, 1 + sw v1, (a3) + add v0, v0, 0x7EE80 + jr ra .out_of_range: - jr $ra - li $v0, -1 + li v0, -1 + jr ra END(LeoGetAAdr) DATA(asc2tbl) diff --git a/src/libleo/api/getaadr2.s b/src/libleo/api/getaadr2.s index e3ea0868bb..cac52cd90b 100644 --- a/src/libleo/api/getaadr2.s +++ b/src/libleo/api/getaadr2.s @@ -1,33 +1,25 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" -.set noat -.set noreorder - -.section .text - -.balign 16 +.text LEAF(LeoGetAAdr2) - li $at, 0x10000 - srl $t8, $a0, 0xF - ori $at, $at, 0xFFFE - and $v1, $t8, $at - li $at, 0x70000 - ori $at, $at, 0xEE80 - srl $t2, $a0, 0x8 - add $v0, $v1, $at - andi $t3, $t2, 0xF - andi $t5, $a0, 0xFE - addi $t4, $t3, 1 - sll $t6, $t5, 0x18 - andi $t9, $a0, 0x1 - srl $v1, $a0, 0xC - sw $t4, ($a2) - sra $t7, $t6, 0x19 - sll $t8, $t9, 0x4 - andi $t1, $v1, 0xF - sw $t7, ($a3) - or $t2, $t1, $t8 - jr $ra - sw $t2, ($a1) + srl t8, a0, 15 + and v1, t8, 0x1FFFE + add v0, v1, 0x7EE80 + srl t2, a0, 8 + andi t3, t2, 0xF + andi t5, a0, 0xFE + addi t4, t3, 1 + sll t6, t5, 24 + andi t9, a0, 1 + srl v1, a0, 12 + sw t4, (a2) + sra t7, t6, 25 + sll t8, t9, 4 + andi t1, v1, 0xF + sw t7, (a3) + or t2, t1, t8 + sw t2, (a1) + jr ra END(LeoGetAAdr2) diff --git a/src/libleo/api/getkadr.s b/src/libleo/api/getkadr.s index 22d85cb593..d71719525a 100644 --- a/src/libleo/api/getkadr.s +++ b/src/libleo/api/getkadr.s @@ -1,11 +1,7 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" -.set noat -.set noreorder - -.section .text - -.balign 16 +.text /** * int LeoGetKAdr(int sjis); @@ -22,74 +18,61 @@ * @return int Storage offset address into the N64 Disk Drive mask ROM. */ LEAF(LeoGetKAdr) - // Check if the codepoint is in the range 0x8140 to 0x9872. - li $at, 0x8140 - slt $at, $a0, $at - bnez $at, .out_of_range - li $at, 0x9873 - slt $at, $a0, $at - beqz $at, .out_of_range - // Characters with codepoints >= 0x8800 are kanji. Arrangement is regular, - // so convert index directly. - li $at, 0x8800 - slt $at, $a0, $at - bnez $at, .nonkanji - // 0xBC is number of glyphs in one block in the `kanji` file: - // 0x100 possible codepoints with the same byte1 - // - 0x40 unused at beginning - // - 1 unused at 0x7F - // - 3 unused at 0xFD, 0xFE, 0xFF - li $a2, 0xBC - // Get byte1 and adjust so starts at 0 - srl $a1, $a0, 8 - addi $a1, $a1, -0x88 - multu $a2, $a1 - // Get byte2 and adjust so starts at 0 - andi $a3, $a0, 0xFF - addi $a3, $a3, -0x40 - slti $at, $a3, (0x80 - 0x40) - mflo $a2 - // 0x__7F is always empty and elided in the file, so decrement if larger - bnezl $at, .kanji_lower_halfblock - mflo $a2 - addi $a3, $a3, -1 - mflo $a2 + /* Check if the codepoint is in the range 0x8140 to 0x9872. */ + blt a0, 0x8140, .out_of_range + bge a0, 0x9873, .out_of_range + /* Characters with codepoints >= 0x8800 are kanji. Arrangement is regular, + * so convert index directly. */ + li a2, 0xBC + blt a0, 0x8800, .nonkanji + /* 0xBC is number of glyphs in one block in the `kanji` file: + * 0x100 possible codepoints with the same byte1 + * - 0x40 unused at beginning + * - 1 unused at 0x7F + * - 3 unused at 0xFD, 0xFE, 0xFF */ + /* Get byte1 and adjust so starts at 0 */ + srl a1, a0, 8 + addi a1, a1, -0x88 + mul a2, a2, a1 + /* Get byte2 and adjust so starts at 0 */ + andi a3, a0, 0xFF + addi a3, a3, -0x40 + /* 0x__7F is always empty and elided in the file, so decrement if larger */ + blt a3, 0x40, .kanji_lower_halfblock + addi a3, a3, -1 .kanji_lower_halfblock: - addi $a3, $a3, 0x30A - add $a3, $a3, $a2 - jr $ra - sll $v0, $a3, 7 - // returns (0x30A + (adjusted byte2) + (adjusted byte1) * 0xBC) * FONT_CHAR_TEX_SIZE + mflo a2 + addi a3, a3, 0x30A + add a3, a3, a2 + sll v0, a3, 7 + jr ra + /* returns (0x30A + (adjusted byte2) + (adjusted byte1) * 0xBC) * FONT_CHAR_TEX_SIZE */ -// Non-kanji are arranged with irregular gaps, use the lookup table. +/* Non-kanji are arranged with irregular gaps, use the lookup table. */ .nonkanji: - // Get byte1 and adjust so starts at 0 - srl $a1, $a0, 8 - addi $a1, $a1, -0x81 - multu $a2, $a1 - // Get byte2 and adjust so starts at 0 - andi $a3, $a0, 0xFF - addi $a3, $a3, -0x40 - slti $at, $a3, (0x80 - 0x40) - mflo $a2 - // 0x__7F is always empty and elided in the file, so decrement if larger - bnezl $at, .nonkanji_lower_halfblock - mflo $a2 - addi $a3, $a3, -1 - mflo $a2 + /* Get byte1 and adjust so starts at 0 */ + srl a1, a0, 8 + addi a1, a1, -0x81 + mul a2, a2, a1 + /* Get byte2 and adjust so starts at 0 */ + andi a3, a0, 0xFF + addi a3, a3, -0x40 + /* 0x__7F is always empty and elided in the file, so decrement if larger */ + blt a3, 0x40, .nonkanji_lower_halfblock + addi a3, a3, -1 .nonkanji_lower_halfblock: - add $a3, $a3, $a2 - lui $a2, %hi(kantbl) - sll $a3, $a3, 1 - addiu $a2, %lo(kantbl) - add $a3, $a3, $a2 - lh $a2, ($a3) - jr $ra - sll $v0, $a2, 7 - // returns kantbl[(adjusted byte2) + (adjusted byte1) * 0xBC] * FONT_CHAR_TEX_SIZE + mflo a2 + add a3, a3, a2 + sll a3, a3, 1 + la a2, kantbl + add a3, a3, a2 + lh a2, (a3) + sll v0, a2, 7 + jr ra + /* returns kantbl[(adjusted byte2) + (adjusted byte1) * 0xBC] * FONT_CHAR_TEX_SIZE */ .out_of_range: - jr $ra - li $v0, -1 + li v0, -1 + jr ra END(LeoGetKAdr) /** @@ -129,7 +112,7 @@ END(LeoGetKAdr) * into blocks by high byte. */ DATA(kantbl) -// 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F +/* 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F */ /* 0x814_ */ .half 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F /* 0x815_ */ .half 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F /* 0x816_ */ .half 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F @@ -143,7 +126,7 @@ DATA(kantbl) /* 0x81E_ */ .half 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 /* 0x81F_ */ .half 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, 0x0090, 0x0000, 0x0000, 0x0000, 0x0000, 0x0091 -// 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F +/* 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F */ /* 0x824_ */ .half 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0092 /* 0x825_ */ .half 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 /* 0x826_ */ .half 0x009C, 0x009D, 0x009E, 0x009F, 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB @@ -157,7 +140,7 @@ DATA(kantbl) /* 0x82E_ */ .half 0x0111, 0x0112, 0x0113, 0x0114, 0x0115, 0x0116, 0x0117, 0x0118, 0x0119, 0x011A, 0x011B, 0x011C, 0x011D, 0x011E, 0x011F, 0x0120 /* 0x82F_ */ .half 0x0121, 0x0122, 0x0123, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 -// 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F +/* 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F */ /* 0x834_ */ .half 0x0124, 0x0125, 0x0126, 0x0127, 0x0128, 0x0129, 0x012A, 0x012B, 0x012C, 0x012D, 0x012E, 0x012F, 0x0130, 0x0131, 0x0132, 0x0133 /* 0x835_ */ .half 0x0134, 0x0135, 0x0136, 0x0137, 0x0138, 0x0139, 0x013A, 0x013B, 0x013C, 0x013D, 0x013E, 0x013F, 0x0140, 0x0141, 0x0142, 0x0143 /* 0x836_ */ .half 0x0144, 0x0145, 0x0146, 0x0147, 0x0148, 0x0149, 0x014A, 0x014B, 0x014C, 0x014D, 0x014E, 0x014F, 0x0150, 0x0151, 0x0152, 0x0153 @@ -171,7 +154,7 @@ DATA(kantbl) /* 0x83E_ */ .half 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 /* 0x83F_ */ .half 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 -// 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F +/* 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F */ /* 0x844_ */ .half 0x01AB, 0x01AC, 0x01AD, 0x01AE, 0x01AF, 0x01B0, 0x01B1, 0x01B2, 0x01B3, 0x01B4, 0x01B5, 0x01B6, 0x01B7, 0x01B8, 0x01B9, 0x01BA /* 0x845_ */ .half 0x01BB, 0x01BC, 0x01BD, 0x01BE, 0x01BF, 0x01C0, 0x01C1, 0x01C2, 0x01C3, 0x01C4, 0x01C5, 0x01C6, 0x01C7, 0x01C8, 0x01C9, 0x01CA /* 0x846_ */ .half 0x01CB, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 @@ -185,7 +168,7 @@ DATA(kantbl) /* 0x84E_ */ .half 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 /* 0x84F_ */ .half 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 -// 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F +/* 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F */ /* 0x854_ */ .half 0x020E, 0x020F, 0x0210, 0x0211, 0x0212, 0x0213, 0x0214, 0x0215, 0x0216, 0x0217, 0x0218, 0x0219, 0x021A, 0x021B, 0x021C, 0x021D /* 0x855_ */ .half 0x021E, 0x021F, 0x0220, 0x0221, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0222, 0x0223 /* 0x856_ */ .half 0x0224, 0x0225, 0x0226, 0x0227, 0x0228, 0x0229, 0x022A, 0x022B, 0x022C, 0x022D, 0x022E, 0x022F, 0x0230, 0x0231, 0x0232, 0x0233 @@ -199,7 +182,7 @@ DATA(kantbl) /* 0x85E_ */ .half 0x026C, 0x026D, 0x026E, 0x026F, 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277, 0x0278, 0x0279, 0x027A, 0x027B /* 0x85F_ */ .half 0x027C, 0x027D, 0x027E, 0x027F, 0x0280, 0x0281, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 -// 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F +/* 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F */ /* 0x864_ */ .half 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287, 0x0288, 0x0289, 0x028A, 0x028B, 0x028C, 0x028D, 0x028E, 0x028F, 0x0290, 0x0291 /* 0x865_ */ .half 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, 0x0298, 0x0299, 0x029A, 0x029B, 0x029C, 0x029D, 0x029E, 0x029F, 0x0000, 0x0000 /* 0x866_ */ .half 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 @@ -213,7 +196,7 @@ DATA(kantbl) /* 0x86E_ */ .half 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 /* 0x86F_ */ .half 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 -// 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F +/* 0x___0 0x___1 0x___2 0x___3 0x___4 0x___5 0x___6 0x___7 0x___8 0x___9 0x___A 0x___B 0x___C 0x___D 0x___E 0x___F */ /* 0x874_ */ .half 0x02C0, 0x02C1, 0x02C2, 0x02C3, 0x02C4, 0x02C5, 0x02C6, 0x02C7, 0x02C8, 0x02C9, 0x02CA, 0x02CB, 0x02CC, 0x02CD, 0x02CE, 0x02CF /* 0x875_ */ .half 0x02D0, 0x02D1, 0x02D2, 0x02D3, 0x02D4, 0x02D5, 0x02D6, 0x02D7, 0x02D8, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 /* 0x876_ */ .half 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 diff --git a/src/libultra/gu/libm_vals.s b/src/libultra/gu/libm_vals.s index 18de1ae4f2..91f2cadb37 100644 --- a/src/libultra/gu/libm_vals.s +++ b/src/libultra/gu/libm_vals.s @@ -1,8 +1,6 @@ #include "ultra64/asm.h" -.section .rodata - -.balign 16 +.rdata DATA(__libm_qnan_f) .word 0x7F810000 diff --git a/src/libultra/io/vimodefpallan1.c b/src/libultra/io/vimodefpallan1.c index 37cb5ff557..5fb019cdfb 100644 --- a/src/libultra/io/vimodefpallan1.c +++ b/src/libultra/io/vimodefpallan1.c @@ -16,15 +16,15 @@ OSViMode osViModeFpalLan1 = { { // comRegs VI_CTRL_TYPE_16 | VI_CTRL_GAMMA_DITHER_ON | VI_CTRL_GAMMA_ON | VI_CTRL_DIVOT_ON | VI_CTRL_ANTIALIAS_MODE_1 | - VI_CTRL_PIXEL_ADV_3, // ctrl - WIDTH(320), // width - BURST(58, 30, 4, 69), // burst - VSYNC(625), // vSync - HSYNC(3177, 23), // hSync - LEAP(3183, 3181), // leap - HSTART(128, 768), // hStart - SCALE(2, 0), // xScale - VCURRENT(0), // vCurrent + VI_CTRL_PIXEL_ADV(3), // ctrl + WIDTH(320), // width + BURST(58, 30, 4, 69), // burst + VSYNC(625), // vSync + HSYNC(3177, 23), // hSync + LEAP(3183, 3181), // leap + HSTART(128, 768), // hStart + SCALE(2, 0), // xScale + VCURRENT(0), // vCurrent }, { // fldRegs { diff --git a/src/libultra/io/vimodempallan1.c b/src/libultra/io/vimodempallan1.c index bcc4273afb..23d53987b0 100644 --- a/src/libultra/io/vimodempallan1.c +++ b/src/libultra/io/vimodempallan1.c @@ -16,15 +16,15 @@ OSViMode osViModeMpalLan1 = { { // comRegs VI_CTRL_TYPE_16 | VI_CTRL_GAMMA_DITHER_ON | VI_CTRL_GAMMA_ON | VI_CTRL_DIVOT_ON | VI_CTRL_ANTIALIAS_MODE_1 | - VI_CTRL_PIXEL_ADV_3, // ctrl - WIDTH(320), // width - BURST(57, 30, 5, 70), // burst - VSYNC(525), // vSync - HSYNC(3089, 4), // hSync - LEAP(3097, 3098), // leap - HSTART(108, 748), // hStart - SCALE(2, 0), // xScale - VCURRENT(0), // vCurrent + VI_CTRL_PIXEL_ADV(3), // ctrl + WIDTH(320), // width + BURST(57, 30, 5, 70), // burst + VSYNC(525), // vSync + HSYNC(3089, 4), // hSync + LEAP(3097, 3098), // leap + HSTART(108, 748), // hStart + SCALE(2, 0), // xScale + VCURRENT(0), // vCurrent }, { // fldRegs { diff --git a/src/libultra/io/vimodentsclan1.c b/src/libultra/io/vimodentsclan1.c index a29d15129f..19a7160ac8 100644 --- a/src/libultra/io/vimodentsclan1.c +++ b/src/libultra/io/vimodentsclan1.c @@ -16,15 +16,15 @@ OSViMode osViModeNtscLan1 = { { // comRegs VI_CTRL_TYPE_16 | VI_CTRL_GAMMA_DITHER_ON | VI_CTRL_GAMMA_ON | VI_CTRL_DIVOT_ON | VI_CTRL_ANTIALIAS_MODE_1 | - VI_CTRL_PIXEL_ADV_3, // ctrl - WIDTH(320), // width - BURST(57, 34, 5, 62), // burst - VSYNC(525), // vSync - HSYNC(3093, 0), // hSync - LEAP(3093, 3093), // leap - HSTART(108, 748), // hStart - SCALE(2, 0), // xScale - VCURRENT(0), // vCurrent + VI_CTRL_PIXEL_ADV(3), // ctrl + WIDTH(320), // width + BURST(57, 34, 5, 62), // burst + VSYNC(525), // vSync + HSYNC(3093, 0), // hSync + LEAP(3093, 3093), // leap + HSTART(108, 748), // hStart + SCALE(2, 0), // xScale + VCURRENT(0), // vCurrent }, { // fldRegs { diff --git a/src/libultra/io/vimodepallan1.c b/src/libultra/io/vimodepallan1.c index d656dc4f94..7a8db603f4 100644 --- a/src/libultra/io/vimodepallan1.c +++ b/src/libultra/io/vimodepallan1.c @@ -16,15 +16,15 @@ OSViMode osViModePalLan1 = { { // comRegs VI_CTRL_TYPE_16 | VI_CTRL_GAMMA_DITHER_ON | VI_CTRL_GAMMA_ON | VI_CTRL_DIVOT_ON | VI_CTRL_ANTIALIAS_MODE_1 | - VI_CTRL_PIXEL_ADV_3, // ctrl - WIDTH(320), // width - BURST(58, 30, 4, 69), // burst - VSYNC(625), // vSync - HSYNC(3177, 23), // hSync - LEAP(3183, 3181), // leap - HSTART(128, 768), // hStart - SCALE(2, 0), // xScale - VCURRENT(0), // vCurrent + VI_CTRL_PIXEL_ADV(3), // ctrl + WIDTH(320), // width + BURST(58, 30, 4, 69), // burst + VSYNC(625), // vSync + HSYNC(3177, 23), // hSync + LEAP(3183, 3181), // leap + HSTART(128, 768), // hStart + SCALE(2, 0), // xScale + VCURRENT(0), // vCurrent }, { // fldRegs { diff --git a/src/libultra/libc/bcmp.s b/src/libultra/libc/bcmp.s index df6692feb5..dcd5bc0023 100644 --- a/src/libultra/libc/bcmp.s +++ b/src/libultra/libc/bcmp.s @@ -1,93 +1,89 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" -.set noat -.set noreorder - -.section .text - -.balign 16 +.text LEAF(bcmp) - slti $at, $a2, 0x10 - bnez $at, bytecmp - xor $v0, $a0, $a1 - andi $v0, $v0, 3 - bnez $v0, unaligncmp - negu $t8, $a0 - andi $t8, $t8, 3 - beqz $t8, wordcmp - subu $a2, $a2, $t8 - move $v0, $v1 - lwl $v0, ($a0) - lwl $v1, ($a1) - addu $a0, $a0, $t8 - addu $a1, $a1, $t8 - bne $v0, $v1, cmpne + xor v0, a0, a1 + blt a2, 0x10, bytecmp + + and v0, v0, 3 + negu t8, a0 + bnez v0, unaligncmp + + and t8, t8, 3 + subu a2, a2, t8 + beqz t8, wordcmp + + move v0, v1 + lwl v0, (a0) + lwl v1, (a1) + addu a0, a0, t8 + addu a1, a1, t8 + bne v0, v1, cmpne + wordcmp: - li $at, ~3 - and $a3, $a2, $at - beqz $a3, bytecmp - subu $a2, $a2, $a3 - addu $a3, $a3, $a0 - lw $v0, ($a0) + and a3, a2, ~3 + subu a2, a2, a3 + beqz a3, bytecmp + + addu a3, a3, a0 1: - lw $v1, ($a1) - addiu $a0, $a0, 4 - addiu $a1, $a1, 4 - bne $v0, $v1, cmpne - nop - bnel $a0, $a3, 1b - lw $v0, ($a0) + lw v0, (a0) + lw v1, (a1) + addu a0, a0, 4 + addu a1, a1, 4 + bne v0, v1, cmpne + bne a0, a3, 1b + b bytecmp - nop + unaligncmp: - negu $a3, $a1 - andi $a3, $a3, 3 - beqz $a3, partaligncmp - subu $a2, $a2, $a3 - addu $a3, $a3, $a0 - lbu $v0, ($a0) + negu a3, a1 + and a3, a3, 3 + subu a2, a2, a3 + beqz a3, partaligncmp + + addu a3, a3, a0 1: - lbu $v1, ($a1) - addiu $a0, $a0, 1 - addiu $a1, $a1, 1 - bne $v0, $v1, cmpne - nop - bnel $a0, $a3, 1b - lbu $v0, ($a0) + lbu v0, (a0) + lbu v1, (a1) + addu a0, a0, 1 + addu a1, a1, 1 + bne v0, v1, cmpne + bne a0, a3, 1b + partaligncmp: - li $at, ~3 - and $a3, $a2, $at - beqz $a3, bytecmp - subu $a2, $a2, $a3 - addu $a3, $a3, $a0 - lwl $v0, ($a0) + and a3, a2, ~3 + subu a2, a2, a3 + beqz a3, bytecmp + + addu a3, a3, a0 1: - lw $v1, ($a1) - lwr $v0, 3($a0) - addiu $a0, $a0, 4 - addiu $a1, $a1, 4 - bne $v0, $v1, cmpne - nop - bnel $a0, $a3, 1b - lwl $v0, ($a0) + lwl v0, (a0) + lw v1, (a1) + lwr v0, 3(a0) + addu a0, a0, 4 + addu a1, a1, 4 + bne v0, v1, cmpne + bne a0, a3, 1b + bytecmp: - blez $a2, cmpdone - addu $a3, $a2, $a0 - lbu $v0, ($a0) + addu a3, a2, a0 + blez a2, cmpdone 1: - lbu $v1, ($a1) - addiu $a0, $a0, 1 - addiu $a1, $a1, 1 - bne $v0, $v1, cmpne - nop - bnel $a0, $a3, 1b - lbu $v0, ($a0) + lbu v0, (a0) + lbu v1, (a1) + addu a0, a0, 1 + addu a1, a1, 1 + bne v0, v1, cmpne + bne a0, a3, 1b + cmpdone: - jr $ra - move $v0, $zero + move v0, zero + jr ra cmpne: - jr $ra - li $v0, 1 + li v0, 1 + jr ra END(bcmp) diff --git a/src/libultra/libc/bcopy.s b/src/libultra/libc/bcopy.s index e1240f113a..dcb6ad13c3 100644 --- a/src/libultra/libc/bcopy.s +++ b/src/libultra/libc/bcopy.s @@ -1,233 +1,211 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" -.set noat -.set noreorder - -.section .text - -.balign 16 +.text LEAF(bcopy) - beqz $a2, ret - move $a3, $a1 - beq $a0, $a1, ret - slt $at, $a1, $a0 - bnezl $at, goforwards - slti $at, $a2, 0x10 - add $v0, $a0, $a2 - slt $at, $a1, $v0 - beql $at, $zero, goforwards - slti $at, $a2, 0x10 + move a3, a1 + beqz a2, ret + beq a0, a1, ret + blt a1, a0, goforwards + add v0, a0, a2 + bge a1, v0, goforwards b gobackwards - slti $at, $a2, 0x10 - slti $at, $a2, 0x10 + goforwards: - bnez $at, forwards_bytecopy - nop - andi $v0, $a0, 3 - andi $v1, $a1, 3 - beq $v0, $v1, forwalignable - nop + blt a2, 0x10, forwards_bytecopy + and v0, a0, 3 + and v1, a1, 3 + beq v0, v1, forwalignable + forwards_bytecopy: - beqz $a2, ret - nop - addu $v1, $a0, $a2 + beqz a2, ret + addu v1, a0, a2 99: - lb $v0, ($a0) - addiu $a0, $a0, 1 - addiu $a1, $a1, 1 - bne $a0, $v1, 99b - sb $v0, -1($a1) + lb v0, (a0) + addu a0, a0, 1 + sb v0, (a1) + addu a1, a1, 1 + bne a0, v1, 99b ret: - jr $ra - move $v0, $a3 + move v0, a3 + jr ra forwalignable: - beqz $v0, forwards_32 - li $at, 1 - beq $v0, $at, forw_copy3 - li $at, 2 - beql $v0, $at, forw_copy2 - lh $v0, ($a0) - lb $v0, ($a0) - addiu $a0, $a0, 1 - addiu $a1, $a1, 1 - addiu $a2, $a2, -1 + beqz v0, forwards_32 + beq v0, 1, forw_copy3 + beq v0, 2, forw_copy2 + + lb v0, (a0) + addu a0, a0, 1 + sb v0, (a1) + addu a1, a1, 1 + addu a2, a2, -1 b forwards_32 - sb $v0, -1($a1) - lh $v0, ($a0) + forw_copy2: - addiu $a0, $a0, 2 - addiu $a1, $a1, 2 - addiu $a2, $a2, -2 + lh v0, (a0) + addu a0, a0, 2 + sh v0, (a1) + addu a1, a1, 2 + addu a2, a2, -2 b forwards_32 - sh $v0, -2($a1) + forw_copy3: - lb $v0, ($a0) - lh $v1, 1($a0) - addiu $a0, $a0, 3 - addiu $a1, $a1, 3 - addiu $a2, $a2, -3 - sb $v0, -3($a1) - sh $v1, -2($a1) + lb v0, (a0) + lh v1, 1(a0) + addiu a0, a0, 3 + sb v0, (a1) + sh v1, 1(a1) + addiu a1, a1, 3 + addiu a2, a2, -3 forwards: forwards_32: - slti $at, $a2, 0x20 - bnezl $at, forwards_16_ - slti $at, $a2, 0x10 - lw $v0, ($a0) - lw $v1, 4($a0) - lw $t0, 8($a0) - lw $t1, 0xC($a0) - lw $t2, 0x10($a0) - lw $t3, 0x14($a0) - lw $t4, 0x18($a0) - lw $t5, 0x1C($a0) - addiu $a0, $a0, 0x20 - addiu $a1, $a1, 0x20 - addiu $a2, $a2, -0x20 - sw $v0, -0x20($a1) - sw $v1, -0x1C($a1) - sw $t0, -0x18($a1) - sw $t1, -0x14($a1) - sw $t2, -0x10($a1) - sw $t3, -0xC($a1) - sw $t4, -8($a1) + blt a2, 32, forwards_16 + lw v0, 0(a0) + lw v1, 4(a0) + lw t0, 8(a0) + lw t1, 12(a0) + lw t2, 16(a0) + lw t3, 20(a0) + lw t4, 24(a0) + lw t5, 28(a0) + addiu a0, a0, 32 + sw v0, 0(a1) + sw v1, 4(a1) + sw t0, 8(a1) + sw t1, 12(a1) + sw t2, 16(a1) + sw t3, 20(a1) + sw t4, 24(a1) + sw t5, 28(a1) + addiu a1, a1, 32 + addiu a2, a2, -32 b forwards_32 - sw $t5, -4($a1) + forwards_16: - slti $at, $a2, 0x10 -forwards_16_: // fake label due to branch likely optimization - bnezl $at, forwards_4_ - slti $at, $a2, 4 - lw $v0, ($a0) - lw $v1, 4($a0) - lw $t0, 8($a0) - lw $t1, 0xC($a0) - addiu $a0, $a0, 0x10 - addiu $a1, $a1, 0x10 - addiu $a2, $a2, -0x10 - sw $v0, -0x10($a1) - sw $v1, -0xC($a1) - sw $t0, -8($a1) + blt a2, 16, forwards_4 + lw v0, 0(a0) + lw v1, 4(a0) + lw t0, 8(a0) + lw t1, 12(a0) + addiu a0, a0, 16 + sw v0, 0(a1) + sw v1, 4(a1) + sw t0, 8(a1) + sw t1, 12(a1) + addiu a1, a1, 16 + addiu a2, a2, -16 b forwards_16 - sw $t1, -4($a1) + forwards_4: - slti $at, $a2, 4 -forwards_4_: // fake label due to branch likely optimization - bnez $at, forwards_bytecopy - nop - lw $v0, ($a0) - addiu $a0, $a0, 4 - addiu $a1, $a1, 4 - addiu $a2, $a2, -4 + blt a2, 4, forwards_bytecopy + + lw v0, 0(a0) + addiu a0, a0, 4 + sw v0, 0(a1) + addiu a1, a1, 4 + addiu a2, a2, -4 b forwards_4 - sw $v0, -4($a1) - slti $at, $a2, 0x10 + gobackwards: - add $a0, $a0, $a2 - bnez $at, backwards_bytecopy - add $a1, $a1, $a2 - andi $v0, $a0, 3 - andi $v1, $a1, 3 - beq $v0, $v1, backalignable - nop + add a0, a0,a2 + add a1, a1,a2 + blt a2, 16, backwards_bytecopy + + andi v0, a0, 0x3 + andi v1, a1, 0x3 + beq v0, v1, backalignable + backwards_bytecopy: - beqz $a2, ret - nop - addiu $a0, $a0, -1 - addiu $a1, $a1, -1 - subu $v1, $a0, $a2 + beqz a2, ret + addiu a0, a0, -1 + addiu a1, a1, -1 + subu v1, a0,a2 99: - lb $v0, ($a0) - addiu $a0, $a0, -1 - addiu $a1, $a1, -1 - bne $a0, $v1, 99b - sb $v0, 1($a1) - jr $ra - move $v0, $a3 + lb v0, 0(a0) + addiu a0, a0, -1 + sb v0, 0(a1) + addiu a1, a1, -1 + bne a0, v1,99b + + move v0, a3 + jr ra backalignable: - beqz $v0, backwards_32 - li $at, 3 - beq $v0, $at, back_copy3 - li $at, 2 - beql $v0, $at, back_copy2 - lh $v0, -2($a0) - lb $v0, -1($a0) - addiu $a0, $a0, -1 - addiu $a1, $a1, -1 - addiu $a2, $a2, -1 - b backwards_32 - sb $v0, ($a1) - lh $v0, -2($a0) + beqz v0, backwards + beq v0, 3, back_copy3 + beq v0, 2, back_copy2 + lb v0, -1(a0) + addiu a0, a0, -1 + sb v0, -1(a1) + addiu a1, a1, -1 + addiu a2, a2, -1 + b backwards + back_copy2: - addiu $a0, $a0, -2 - addiu $a1, $a1, -2 - addiu $a2, $a2, -2 - b backwards_32 - sh $v0, ($a1) + lh v0, -2(a0) + addiu a0, a0, -2 + sh v0, -2(a1) + addiu a1, a1, -2 + addiu a2, a2, -2 + b backwards + back_copy3: - lb $v0, -1($a0) - lh $v1, -3($a0) - addiu $a0, $a0, -3 - addiu $a1, $a1, -3 - addiu $a2, $a2, -3 - sb $v0, 2($a1) - sh $v1, ($a1) + lb v0, -1(a0) + lh v1, -3(a0) + addiu a0, a0, -3 + sb v0, -1(a1) + sh v1, -3(a1) + addiu a1, a1, -3 + addiu a2, a2, -3 backwards: backwards_32: - slti $at, $a2, 0x20 - bnezl $at, backwards_16_ - slti $at, $a2, 0x10 - lw $v0, -4($a0) - lw $v1, -8($a0) - lw $t0, -0xc($a0) - lw $t1, -0x10($a0) - lw $t2, -0x14($a0) - lw $t3, -0x18($a0) - lw $t4, -0x1c($a0) - lw $t5, -0x20($a0) - addiu $a0, $a0, -0x20 - addiu $a1, $a1, -0x20 - addiu $a2, $a2, -0x20 - sw $v0, 0x1C($a1) - sw $v1, 0x18($a1) - sw $t0, 0x14($a1) - sw $t1, 0x10($a1) - sw $t2, 0xC($a1) - sw $t3, 8($a1) - sw $t4, 4($a1) + blt a2, 32, backwards_16 + lw v0, -4(a0) + lw v1, -8(a0) + lw t0, -12(a0) + lw t1, -16(a0) + lw t2, -20(a0) + lw t3, -24(a0) + lw t4, -28(a0) + lw t5, -32(a0) + addiu a0, a0, -32 + sw v0, -4(a1) + sw v1, -8(a1) + sw t0, -12(a1) + sw t1, -16(a1) + sw t2, -20(a1) + sw t3, -24(a1) + sw t4, -28(a1) + sw t5, -32(a1) + addiu a1, a1, -32 + addiu a2, a2, -32 b backwards_32 - sw $t5, ($a1) + backwards_16: - slti $at, $a2, 0x10 -backwards_16_: // fake label due to branch likely optimization - bnezl $at, backwards_4_ - slti $at, $a2, 4 - lw $v0, -4($a0) - lw $v1, -8($a0) - lw $t0, -0xC($a0) - lw $t1, -0x10($a0) - addiu $a0, $a0, -0x10 - addiu $a1, $a1, -0x10 - addiu $a2, $a2, -0x10 - sw $v0, 0xC($a1) - sw $v1, 8($a1) - sw $t0, 4($a1) + blt a2, 16, backwards_4 + lw v0, -4(a0) + lw v1, -8(a0) + lw t0, -12(a0) + lw t1, -16(a0) + addiu a0, a0, -16 + sw v0, -4(a1) + sw v1, -8(a1) + sw t0, -12(a1) + sw t1, -16(a1) + addiu a1, a1, -16 + addiu a2, a2, -16 b backwards_16 - sw $t1, ($a1) + backwards_4: - slti $at, $a2, 4 -backwards_4_: // fake label due to branch likely optimization - bnez $at, backwards_bytecopy - nop - lw $v0, -4($a0) - addiu $a0, $a0, -4 - addiu $a1, $a1, -4 - addiu $a2, $a2, -4 + blt a2, 4, backwards_bytecopy + lw v0, -4(a0) + addiu a0, a0, -4 + sw v0, -4(a1) + addiu a1, a1, -4 + addiu a2, a2, -4 b backwards_4 - sw $v0, ($a1) END(bcopy) diff --git a/src/libultra/libc/bzero.s b/src/libultra/libc/bzero.s index 5323a92d7d..9c6552f654 100644 --- a/src/libultra/libc/bzero.s +++ b/src/libultra/libc/bzero.s @@ -1,65 +1,59 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" -.set noat -.set noreorder - -.section .text - -.balign 16 +.text LEAF(bzero) - slti $at, $a1, 0xC - bnez $at, bytezero - negu $v1, $a0 - andi $v1, $v1, 3 - beqz $v1, blkzero - subu $a1, $a1, $v1 - swl $zero, ($a0) - addu $a0, $a0, $v1 + negu v1, a0 + blt a1, 0xC, bytezero + + and v1, v1, 3 + subu a1, a1, v1 + beqz v1, blkzero + + swl zero, (a0) + addu a0, a0, v1 blkzero: - // align backwards to 0x20 - li $at, ~0x1F - and $a3, $a1, $at - // If the result is zero, the amount to zero is less than 0x20 bytes - beqz $a3, wordzero - subu $a1, $a1, $a3 - // zero in blocks of 0x20 at a time - addu $a3, $a3, $a0 + /* align backwards to 0x20 */ + and a3, a1, ~(0x20 - 1) + /* If the result is zero, the amount to zero is less than 0x20 bytes */ + subu a1, a1, a3 + beqz a3, wordzero + /* zero in blocks of 0x20 at a time */ + addu a3, a3, a0 1: - addiu $a0, $a0, 0x20 - sw $zero, -0x20($a0) - sw $zero, -0x1C($a0) - sw $zero, -0x18($a0) - sw $zero, -0x14($a0) - sw $zero, -0x10($a0) - sw $zero, -0xC($a0) - sw $zero, -8($a0) - bne $a0, $a3, 1b - sw $zero, -4($a0) + sw zero, 0(a0) + sw zero, 4(a0) + sw zero, 8(a0) + sw zero, 12(a0) + addiu a0, a0, 0x20 + sw zero, -16(a0) + sw zero, -12(a0) + sw zero, -8(a0) + sw zero, -4(a0) + bne a0, a3, 1b + wordzero: - // align backwards to 0x4 - li $at, ~3 - and $a3, $a1, $at - // If the result is zero, the amount to zero is less than 0x4 bytes - beqz $a3, bytezero - subu $a1, $a1, $a3 - // zero one word at a time - addu $a3, $a3, $a0 + /* align backwards to 0x4 */ + and a3, a1, ~3 + /* If the result is zero, the amount to zero is less than 0x4 bytes */ + subu a1, a1, a3 + beqz a3, bytezero + /* zero one word at a time */ + addu a3, a3, a0 1: - addiu $a0, $a0, 4 - bne $a0, $a3, 1b - sw $zero, -4($a0) + addu a0, a0, 4 + sw zero, -4(a0) + bne a0, a3, 1b bytezero: - // test if nothing left to zero - blez $a1, zerodone - nop - // zero one byte at a time - addu $a1, $a1, $a0 + /* test if nothing left to zero */ + blez a1, zerodone + /* zero one byte at a time */ + addu a1, a1, a0 1: - addiu $a0, $a0, 1 - bne $a0, $a1, 1b - sb $zero, -1($a0) + sb zero, (a0) + addiu a0, a0, 1 + bne a0, a1, 1b zerodone: - jr $ra - nop + jr ra END(bzero) diff --git a/src/libultra/mgu/mtxf2l.s b/src/libultra/mgu/mtxf2l.s index edbb675a8e..1fada6b530 100644 --- a/src/libultra/mgu/mtxf2l.s +++ b/src/libultra/mgu/mtxf2l.s @@ -1,40 +1,36 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" -.set noat -.set noreorder - -.section .text - -.balign 32 +.text +.align 5 #define MTX_INTPART 0 #define MTX_FRACPART 0x20 LEAF(guMtxF2L) - li $at, 0x47800000 // 65536.0f - mtc1 $at, $f0 - li $t9, 0xFFFF0000 - addiu $t8, $a1, MTX_FRACPART + li.s fv0, 65536.0 + li t9, 0xFFFF0000 + addu t8, a1, MTX_FRACPART 1: - lwc1 $f4, ($a0) - lwc1 $f10, 4($a0) - addiu $a1, $a1, 4 - mul.s $f6, $f4, $f0 - addiu $a0, $a0, 8 - mul.s $f16, $f10, $f0 - trunc.w.s $f8, $f6 - trunc.w.s $f18, $f16 - mfc1 $t0, $f8 - mfc1 $t1, $f18 - and $t2, $t0, $t9 - sll $t5, $t0, 0x10 - srl $t3, $t1, 0x10 - andi $t6, $t1, 0xFFFF - or $t4, $t2, $t3 - or $t7, $t5, $t6 - sw $t4, (MTX_INTPART-4)($a1) - bne $a1, $t8, 1b - sw $t7, (MTX_FRACPART-4)($a1) - jr $ra - nop + lwc1 ft0, 0(a0) + lwc1 ft3, 4(a0) + mul.s ft1, ft0, fv0 + mul.s ft4, ft3, fv0 + trunc.w.s ft2, ft1 + trunc.w.s ft5, ft4 + mfc1 t0, ft2 + mfc1 t1, ft5 + srl t3, t1, 0x10 + and t6, t1, 0xFFFF + and t2, t0, t9 + sll t5, t0, 0x10 + or t4, t2, t3 + or t7, t5, t6 + sw t4, (MTX_INTPART)(a1) + sw t7, (MTX_FRACPART)(a1) + addu a0, a0, 8 + addu a1, a1, 4 + bne a1, t8, 1b + + jr ra END(guMtxF2L) diff --git a/src/libultra/mgu/mtxident.s b/src/libultra/mgu/mtxident.s index fb0e4c44ad..7f074a5526 100644 --- a/src/libultra/mgu/mtxident.s +++ b/src/libultra/mgu/mtxident.s @@ -1,29 +1,27 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" -.set noreorder - -.section .text - -.balign 32 +.text +.align 5 LEAF(guMtxIdent) - addi $t0, $zero, 1 - sll $t1, $t0, 0x10 - sw $t1, ($a0) - sw $zero, 4($a0) - sw $t0, 8($a0) - sw $zero, 0xc($a0) - sw $zero, 0x10($a0) - sw $t1, 0x14($a0) - sw $zero, 0x18($a0) - sw $t0, 0x1C($a0) - sw $zero, 0x20($a0) - sw $zero, 0x24($a0) - sw $zero, 0x28($a0) - sw $zero, 0x2c($a0) - sw $zero, 0x30($a0) - sw $zero, 0x34($a0) - sw $zero, 0x38($a0) - jr $ra - sw $zero, 0x3C($a0) + add t0, zero, 1 + sll t1, t0, 0x10 + sw t1, 0x00(a0) + sw zero, 0x04(a0) + sw t0, 0x08(a0) + sw zero, 0x0C(a0) + sw zero, 0x10(a0) + sw t1, 0x14(a0) + sw zero, 0x18(a0) + sw t0, 0x1C(a0) + sw zero, 0x20(a0) + sw zero, 0x24(a0) + sw zero, 0x28(a0) + sw zero, 0x2c(a0) + sw zero, 0x30(a0) + sw zero, 0x34(a0) + sw zero, 0x38(a0) + sw zero, 0x3C(a0) + jr ra END(guMtxIdent) diff --git a/src/libultra/mgu/mtxidentf.s b/src/libultra/mgu/mtxidentf.s index 583fc22184..490b79e4b4 100644 --- a/src/libultra/mgu/mtxidentf.s +++ b/src/libultra/mgu/mtxidentf.s @@ -1,28 +1,26 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" -.set noreorder - -.section .text - -.balign 32 +.text +.align 5 LEAF(guMtxIdentF) - li $t0, 0x3F800000 // 1.0f - sw $t0, ($a0) - sw $zero, 4($a0) - sw $zero, 8($a0) - sw $zero, 0xC($a0) - sw $zero, 0x10($a0) - sw $t0, 0x14($a0) - sw $zero, 0x18($a0) - sw $zero, 0x1C($a0) - sw $zero, 0x20($a0) - sw $zero, 0x24($a0) - sw $t0, 0x28($a0) - sw $zero, 0x2C($a0) - sw $zero, 0x30($a0) - sw $zero, 0x34($a0) - sw $zero, 0x38($a0) - jr $ra - sw $t0, 0x3C($a0) + li.s t0, 1.0 + sw t0, 0x00(a0) + sw zero, 0x04(a0) + sw zero, 0x08(a0) + sw zero, 0x0C(a0) + sw zero, 0x10(a0) + sw t0, 0x14(a0) + sw zero, 0x18(a0) + sw zero, 0x1C(a0) + sw zero, 0x20(a0) + sw zero, 0x24(a0) + sw t0, 0x28(a0) + sw zero, 0x2C(a0) + sw zero, 0x30(a0) + sw zero, 0x34(a0) + sw zero, 0x38(a0) + sw t0, 0x3C(a0) + jr ra END(guMtxIdentF) diff --git a/src/libultra/mgu/mtxl2f.s b/src/libultra/mgu/mtxl2f.s index 9298da37cc..b95e3aca7f 100644 --- a/src/libultra/mgu/mtxl2f.s +++ b/src/libultra/mgu/mtxl2f.s @@ -1,41 +1,43 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" -.set noat -.set noreorder - -.section .text - -.balign 32 +.text +.align 5 #define MTX_INTPART 0 #define MTX_FRACPART 0x20 +#define FIXTOF 0.0000152587890625 /* 1.0f / 65536.0f */ + LEAF(guMtxL2F) - li $at, 0x37800000 // 1.0f / 65536.0f - mtc1 $at, $f0 - li $t9, 0xFFFF0000 - addiu $t8, $a1, MTX_FRACPART + li.s fv0, FIXTOF + li t9, 0xFFFF0000 + addiu t8, a1, MTX_FRACPART 1: - lw $t0, MTX_INTPART($a1) - lw $t1, MTX_FRACPART($a1) - addiu $a1, $a1, 4 - and $t2, $t0, $t9 - srl $t3, $t1, 0x10 - or $t4, $t2, $t3 - mtc1 $t4, $f4 - sll $t5, $t0, 0x10 - andi $t6, $t1, 0xFFFF - or $t7, $t5, $t6 - cvt.s.w $f6, $f4 - mtc1 $t7, $f10 - addiu $a0, $a0, 8 - cvt.s.w $f16, $f10 - mul.s $f8, $f6, $f0 - nop - mul.s $f18, $f16, $f0 - swc1 $f8, -8($a0) - bne $a1, $t8, 1b - swc1 $f18, -4($a0) - jr $ra - nop + lw t0, MTX_INTPART(a1) + lw t1, MTX_FRACPART(a1) + + and t2, t0, t9 + srl t3, t1, 0x10 + or t4, t2, t3 + + sll t5, t0, 0x10 + and t6, t1, 0xFFFF + or t7, t5, t6 + + mtc1 t4, ft0 + cvt.s.w ft1, ft0 + mul.s ft2, ft1, fv0 + + mtc1 t7, ft3 + cvt.s.w ft4, ft3 + mul.s ft5, ft4, fv0 + + swc1 ft2, 0(a0) + swc1 ft5, 4(a0) + addu a0, a0, 8 + addu a1, a1, 4 + bne a1, t8, 1b + + jr ra END(guMtxL2F) diff --git a/src/libultra/mgu/normalize.s b/src/libultra/mgu/normalize.s index 7686317ea2..da10204d6c 100644 --- a/src/libultra/mgu/normalize.s +++ b/src/libultra/mgu/normalize.s @@ -1,31 +1,27 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" -.set noreorder - -.section .text - -.balign 32 +.text +.align 5 LEAF(guNormalize) - lwc1 $f4, ($a0) - lwc1 $f6, ($a1) - lwc1 $f8, ($a2) - mul.s $f10, $f4, $f4 - li $t0, 0x3F800000 // 1.0f - mul.s $f16, $f6, $f6 - add.s $f18, $f10, $f16 - mul.s $f16, $f8, $f8 - add.s $f10, $f16, $f18 - mtc1 $t0, $f18 - sqrt.s $f16, $f10 - div.s $f10, $f18, $f16 - mul.s $f16, $f4, $f10 - nop - mul.s $f18, $f6, $f10 - nop - mul.s $f4, $f8, $f10 - swc1 $f16, ($a0) - swc1 $f18, ($a1) - jr $ra - swc1 $f4, ($a2) + lwc1 ft0, (a0) + lwc1 ft1, (a1) + lwc1 ft2, (a2) + li.s t0, 1.0 + mul.s ft3, ft0, ft0 + mul.s ft4, ft1, ft1 + add.s ft5, ft3, ft4 + mul.s ft4, ft2, ft2 + add.s ft3, ft4, ft5 + mtc1 t0, ft5 + sqrt.s ft4, ft3 + div.s ft3, ft5, ft4 + mul.s ft4, ft0, ft3 + mul.s ft5, ft1, ft3 + mul.s ft0, ft2, ft3 + swc1 ft4, (a0) + swc1 ft5, (a1) + swc1 ft0, (a2) + jr ra END(guNormalize) diff --git a/src/libultra/mgu/scale.s b/src/libultra/mgu/scale.s index 80199c0621..5661b940cc 100644 --- a/src/libultra/mgu/scale.s +++ b/src/libultra/mgu/scale.s @@ -1,52 +1,56 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" -.set noat -.set noreorder - -.section .text - -.balign 32 +.text +.align 5 LEAF(guScale) - li $at, 0x47800000 // 65536.0f - mtc1 $at, $f4 - mtc1 $a1, $f6 - sw $zero, 4($a0) - sw $zero, 0xC($a0) - mul.s $f8, $f6, $f4 - mtc1 $a2, $f6 - sw $zero, 0x10($a0) - sw $zero, 0x18($a0) - sw $zero, 0x24($a0) - sw $zero, 0x2C($a0) - sw $zero, 0x30($a0) - trunc.w.s $f10, $f8 - mul.s $f8, $f6, $f4 - mtc1 $a3, $f6 - sw $zero, 0x38($a0) - mfc1 $t1, $f10 - sw $zero, 0x3C($a0) - srl $t2, $t1, 0x10 - trunc.w.s $f10, $f8 - mul.s $f8, $f6, $f4 - sll $t0, $t2, 0x10 - sll $t2, $t1, 0x10 - mfc1 $t1, $f10 - sw $t0, ($a0) - sw $t2, 0x20($a0) - srl $t0, $t1, 0x10 - trunc.w.s $f10, $f8 - andi $t2, $t1, 0xFFFF - sw $t2, 0x28($a0) - sw $t0, 8($a0) - mfc1 $t1, $f10 - nop - srl $t2, $t1, 0x10 - sll $t0, $t2, 0x10 - sw $t0, 0x14($a0) - li $t0, 1 - sll $t2, $t1, 0x10 - sw $t2, 0x34($a0) - jr $ra - sw $t0, 0x1C($a0) + li.s ft0, 65536.0 + + mtc1 a1, ft1 + mul.s ft2, ft1, ft0 + trunc.w.s ft3, ft2 + mfc1 t1, ft3 + + srl t2, t1, 0x10 + sll t0, t2, 0x10 + sw t0, 0x00(a0) + sll t2, t1, 0x10 + sw t2, 0x20(a0) + + mtc1 a2, ft1 + mul.s ft2, ft1, ft0 + trunc.w.s ft3, ft2 + mfc1 t1, ft3 + + srl t0, t1, 0x10 + sw t0, 0x08(a0) + andi t2, t1, 0xFFFF + sw t2, 0x28(a0) + + mtc1 a3, ft1 + mul.s ft2, ft1, ft0 + trunc.w.s ft3, ft2 + mfc1 t1, ft3 + + srl t2, t1, 0x10 + sll t0, t2, 0x10 + sw t0, 0x14(a0) + sll t2, t1, 0x10 + sw t2, 0x34(a0) + + li t0, 1 + sw t0, 0x1C(a0) + + sw zero, 0x04(a0) + sw zero, 0x0C(a0) + sw zero, 0x10(a0) + sw zero, 0x18(a0) + sw zero, 0x24(a0) + sw zero, 0x2C(a0) + sw zero, 0x30(a0) + sw zero, 0x38(a0) + sw zero, 0x3C(a0) + + jr ra END(guScale) diff --git a/src/libultra/mgu/translate.s b/src/libultra/mgu/translate.s index d059ad5692..4ee630751f 100644 --- a/src/libultra/mgu/translate.s +++ b/src/libultra/mgu/translate.s @@ -1,61 +1,68 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" -.set noat -.set noreorder - -.section .text - -.balign 32 +.text +.align 5 LEAF(guTranslate) - li $at, 0x47800000 // 65536.0f - mtc1 $at, $f4 - mtc1 $a1, $f6 - sw $zero, ($a0) - sw $zero, 0x14($a0) - mul.s $f8, $f6, $f4 - mtc1 $a2, $f6 - sw $zero, 8($a0) - sw $zero, 4($a0) - sw $zero, 0xC($a0) - sw $zero, 0x10($a0) - sw $zero, 0x20($a0) - trunc.w.s $f10, $f8 - mul.s $f8, $f6, $f4 - mtc1 $a3, $f6 - sw $zero, 0x24($a0) - mfc1 $t1, $f10 - sw $zero, 0x28($a0) - sw $zero, 0x2C($a0) - srl $t2, $t1, 0x10 - trunc.w.s $f10, $f8 - mul.s $f8, $f6, $f4 - sll $t0, $t2, 0x10 - sw $zero, 0x30($a0) - mfc1 $t3, $f10 - sw $zero, 0x34($a0) - srl $t2, $t3, 0x10 - trunc.w.s $f10, $f8 - or $t0, $t0, $t2 - sw $t0, 0x18($a0) - sll $t0, $t1, 0x10 - sll $t2, $t3, 0x10 - mfc1 $t1, $f10 - srl $t2, $t2, 0x10 - or $t0, $t0, $t2 - sw $t0, 0x38($a0) - srl $t2, $t1, 0x10 - sll $t0, $t2, 0x10 - addiu $t0, $t0, 1 - sw $t0, 0x1C($a0) - lui $t0, 1 - ori $t0, $t0, 0 - sw $t0, ($a0) - sw $t0, 0x14($a0) - lui $t0, (0x00000001 >> 16) - ori $t0, (0x00000001 & 0xFFFF) - sll $t2, $t1, 0x10 - sw $t2, 0x3C($a0) - jr $ra - sw $t0, 8($a0) + li.s ft0, 65536.0 + + mtc1 a1, ft1 + mul.s ft2, ft1, ft0 + trunc.w.s ft3, ft2 + mfc1 t1, ft3 + + mtc1 a2, ft1 + mul.s ft2, ft1, ft0 + trunc.w.s ft3, ft2 + mfc1 t3, ft3 + + srl t2, t1, 0x10 + sll t0, t2, 0x10 + srl t2, t3, 0x10 + or t0, t0, t2 + sw t0, 0x18(a0) + + sll t2, t3, 0x10 + sll t0, t1, 0x10 + srl t2, t2, 0x10 + or t0, t0, t2 + sw t0, 0x38(a0) + + mtc1 a3, ft1 + mul.s ft2, ft1, ft0 + trunc.w.s ft3, ft2 + mfc1 t1, ft3 + + srl t2, t1, 0x10 + sll t0, t2, 0x10 + addiu t0, t0, 1 + sw t0, 0x1C(a0) + + sll t2, t1, 0x10 + sw t2, 0x3C(a0) + + sw zero, 0x00(a0) + sw zero, 0x04(a0) + sw zero, 0x08(a0) + sw zero, 0x0C(a0) + sw zero, 0x10(a0) + sw zero, 0x14(a0) + sw zero, 0x20(a0) + sw zero, 0x24(a0) + sw zero, 0x28(a0) + sw zero, 0x2C(a0) + sw zero, 0x30(a0) + sw zero, 0x34(a0) + + lui t0, (0x00010000 >> 16) + ori t0, (0x00010000 & 0xFFFF) + sw t0, (a0) + sw t0, 0x14(a0) + + lui t0, (0x00000001 >> 16) + ori t0, (0x00000001 & 0xFFFF) + sw t0, 8(a0) + + jr ra END(guTranslate) diff --git a/src/libultra/os/exceptasm.s b/src/libultra/os/exceptasm.s index 3c55d66ac8..3c02267b9e 100644 --- a/src/libultra/os/exceptasm.s +++ b/src/libultra/os/exceptasm.s @@ -1,4 +1,8 @@ +#ifdef __GNUC__ +.set gp=64 +#endif #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" #include "ultra64/rcp.h" #include "ultra64/message.h" @@ -6,17 +10,12 @@ #include "ultra64/exception.h" #include "ultra64/version.h" -.set noat -.set noreorder -.set gp=64 - -.section .data - -.balign 16 +.data +.align 2 DATA(__osHwIntTable) .word 0, 0 - .word 0, 0 // cart + .word 0, 0 /* cart */ .word 0, 0 .word 0, 0 .word 0, 0 @@ -26,9 +25,8 @@ DATA(__osPiIntTable) .word 0, 0 ENDDATA(__osPiIntTable) -.section .rodata - -.balign 16 +.rdata +.align 2 __osIntOffTable: .byte 0x00 /* redispatch */ @@ -75,215 +73,197 @@ __osIntTable: .word IP7_Hdlr .word counter -.section .text - -.balign 16 +.text /** - * The exception preamble is copied to the exception vectors at + * The 16-byte exception preamble is copied to the exception vectors at * UT_VEC, XUT_VEC, ECC_VEC, E_VEC, to direct execution to __osException */ LEAF(__osExceptionPreamble) - lui $k0, %hi(__osException) - addiu $k0, %lo(__osException) - jr $k0 - nop + la k0, __osException + jr k0 END(__osExceptionPreamble) LEAF(__osException) - // Load scratch space for thread saving - lui $k0, %hi(__osThreadSave) - addiu $k0, %lo(__osThreadSave) - // Save $at - sd $at, THREAD_AT($k0) - // Save sr - mfc0 $k1, C0_SR - sw $k1, THREAD_SR($k0) - // Disable interrupts - li $at, ~(SR_IE | SR_EXL) - and $k1, $k1, $at - mtc0 $k1, C0_SR - // Save some temp registers for use in the following - sd $t0, THREAD_T0($k0) - sd $t1, THREAD_T1($k0) - sd $t2, THREAD_T2($k0) - // Mark FPU as unused - sw $zero, THREAD_FP($k0) - // Left over from misplaced ifdef, immediately overwritten on next instruction - mfc0 $t0, C0_CAUSE +.set noat + /* Load scratch space for thread saving */ + la k0, __osThreadSave + /* Save at */ + sd AT, THREAD_AT(k0) +.set at + /* Save sr */ + MFC0( k1, C0_SR) + sw k1, THREAD_SR(k0) + /* Disable interrupts */ + and k1, k1, ~(SR_IE | SR_EXL) + MTC0( k1, C0_SR) + /* Save some temp registers for use in the following */ + sd t0, THREAD_T0(k0) + sd t1, THREAD_T1(k0) + sd t2, THREAD_T2(k0) + /* Mark FPU as unused */ + sw zero, THREAD_FP(k0) + /* Left over from misplaced ifdef, immediately overwritten on next instruction */ + MFC0( t0, C0_CAUSE) savecontext: - // Save the previously running thread's context to be restored when it resumes - move $t0, $k0 - lui $k0, %hi(__osRunningThread) - lw $k0, %lo(__osRunningThread)($k0) - ld $t1, THREAD_AT($t0) - sd $t1, THREAD_AT($k0) - ld $t1, THREAD_SR($t0) - sd $t1, THREAD_SR($k0) - ld $t1, THREAD_T0($t0) - sd $t1, THREAD_T0($k0) - ld $t1, THREAD_T1($t0) - sd $t1, THREAD_T1($k0) - ld $t1, THREAD_T2($t0) - sd $t1, THREAD_T2($k0) - lw $k1, THREAD_SR($k0) - mflo $t0 - sd $t0, THREAD_LO($k0) - mfhi $t0 - andi $t1, $k1, SR_IMASK - sd $v0, THREAD_V0($k0) - sd $v1, THREAD_V1($k0) - sd $a0, THREAD_A0($k0) - sd $a1, THREAD_A1($k0) - sd $a2, THREAD_A2($k0) - sd $a3, THREAD_A3($k0) - sd $t3, THREAD_T3($k0) - sd $t4, THREAD_T4($k0) - sd $t5, THREAD_T5($k0) - sd $t6, THREAD_T6($k0) - sd $t7, THREAD_T7($k0) - sd $s0, THREAD_S0($k0) - sd $s1, THREAD_S1($k0) - sd $s2, THREAD_S2($k0) - sd $s3, THREAD_S3($k0) - sd $s4, THREAD_S4($k0) - sd $s5, THREAD_S5($k0) - sd $s6, THREAD_S6($k0) - sd $s7, THREAD_S7($k0) - sd $t8, THREAD_T8($k0) - sd $t9, THREAD_T9($k0) - sd $gp, THREAD_GP($k0) - sd $sp, THREAD_SP($k0) - sd $fp, THREAD_S8($k0) - sd $ra, THREAD_RA($k0) - beqz $t1, savercp - sd $t0, THREAD_HI($k0) - // If any CPU interrupts are enabled in the previous thread's SR, bitwise-OR in the - // disabled CPU interrupts from the global interrupt mask. - // This is an attempt at reverting the effect of masking the thread's SR with the - // global interrupt mask. This is however broken, see comments for osSetIntMask. - lui $t0, %hi(__OSGlobalIntMask) - addiu $t0, %lo(__OSGlobalIntMask) - lw $t0, ($t0) - li $at, ~0 - xor $t2, $t0, $at - lui $at, ((~SR_IMASK) >> 0x10) & 0xFFFF - andi $t2, $t2, SR_IMASK - ori $at, (~SR_IMASK) & 0xFFFF - or $t4, $t1, $t2 - and $t3, $k1, $at - andi $t0, $t0, SR_IMASK - or $t3, $t3, $t4 - and $t1, $t1, $t0 - and $k1, $k1, $at - sw $t3, THREAD_SR($k0) - or $k1, $k1, $t1 +.set noreorder + /* Save the previously running thread's context to be restored when it resumes */ + move t0, k0 + lw k0, __osRunningThread + ld t1, THREAD_AT(t0) + sd t1, THREAD_AT(k0) + ld t1, THREAD_SR(t0) + sd t1, THREAD_SR(k0) + ld t1, THREAD_T0(t0) + sd t1, THREAD_T0(k0) + ld t1, THREAD_T1(t0) + sd t1, THREAD_T1(k0) + ld t1, THREAD_T2(t0) + sd t1, THREAD_T2(k0) +.set reorder + sd $2, THREAD_V0(k0) + sd $3, THREAD_V1(k0) + sd $4, THREAD_A0(k0) + sd $5, THREAD_A1(k0) + sd $6, THREAD_A2(k0) + sd $7, THREAD_A3(k0) + sd $11, THREAD_T3(k0) + sd $12, THREAD_T4(k0) + sd $13, THREAD_T5(k0) + sd $14, THREAD_T6(k0) + sd $15, THREAD_T7(k0) + sd $16, THREAD_S0(k0) + sd $17, THREAD_S1(k0) + sd $18, THREAD_S2(k0) + sd $19, THREAD_S3(k0) + sd $20, THREAD_S4(k0) + sd $21, THREAD_S5(k0) + sd $22, THREAD_S6(k0) + sd $23, THREAD_S7(k0) + sd $24, THREAD_T8(k0) + sd $25, THREAD_T9(k0) + sd $28, THREAD_GP(k0) + sd $29, THREAD_SP(k0) + sd $30, THREAD_S8(k0) + sd $31, THREAD_RA(k0) + mflo t0 + sd t0, THREAD_LO(k0) + mfhi t0 + sd t0, THREAD_HI(k0) + lw k1, THREAD_SR(k0) + andi t1, k1, SR_IMASK + beqz t1, savercp + /* If any CPU interrupts are enabled in the previous thread's SR, bitwise-OR in the */ + /* disabled CPU interrupts from the global interrupt mask. */ + /* This is an attempt at reverting the effect of masking the thread's SR with the */ + /* global interrupt mask. This is however broken, see comments for osSetIntMask. */ + la t0, __OSGlobalIntMask + lw t0, (t0) + xor t2, t0, ~0 + andi t2, t2, SR_IMASK + or t4, t1, t2 + and t3, k1, ~SR_IMASK + andi t0, t0, SR_IMASK + or t3, t3, t4 + and t1, t1, t0 + and k1, k1, ~SR_IMASK + sw t3, THREAD_SR(k0) + or k1, k1, t1 savercp: - // Save the currently masked RCP interrupts. - lui $t1, %hi(PHYS_TO_K1(MI_INTR_MASK_REG)) - lw $t1, %lo(PHYS_TO_K1(MI_INTR_MASK_REG))($t1) - beqz $t1, endrcp - nop - // Similar to the above comment, but for RCP interrupt enable bits rather than CPU. - // This suffers from the same problem as above. - lui $t0, %hi(__OSGlobalIntMask) - addiu $t0, %lo(__OSGlobalIntMask) - lw $t0, ($t0) - lw $t4, THREAD_RCP($k0) - li $at, ~0 - srl $t0, $t0, RCP_IMASKSHIFT - xor $t0, $t0, $at - andi $t0, $t0, (RCP_IMASK >> RCP_IMASKSHIFT) - and $t0, $t0, $t4 - or $t1, $t1, $t0 + /* Save the currently masked RCP interrupts. */ + lw t1, PHYS_TO_K1(MI_INTR_MASK_REG) + beqz t1, endrcp + /* Similar to the above comment, but for RCP interrupt enable bits rather than CPU. */ + /* This suffers from the same problem as above. */ + la t0, __OSGlobalIntMask + lw t0, (t0) + srl t0, t0, RCP_IMASKSHIFT + xor t0, t0, ~0 + andi t0, t0, (RCP_IMASK >> RCP_IMASKSHIFT) + lw t4, THREAD_RCP(k0) + and t0, t0, t4 + or t1, t1, t0 endrcp: - sw $t1, THREAD_RCP($k0) - mfc0 $t0, C0_EPC - sw $t0, THREAD_PC($k0) - lw $t0, THREAD_FP($k0) - beqz $t0, handle_interrupt - nop - // Save FP Registers if FPU was used by the thread - cfc1 $t0, C1_FPCSR + sw t1, THREAD_RCP(k0) + MFC0( t0, C0_EPC) + sw t0, THREAD_PC(k0) + lw t0, THREAD_FP(k0) + beqz t0, handle_interrupt + /* Save FP Registers if FPU was used by the thread */ +.set noreorder + cfc1 t0, C1_FPCSR nop - sw $t0, THREAD_FPCSR($k0) - sdc1 $f0, THREAD_FP0($k0) - sdc1 $f2, THREAD_FP2($k0) - sdc1 $f4, THREAD_FP4($k0) - sdc1 $f6, THREAD_FP6($k0) - sdc1 $f8, THREAD_FP8($k0) - sdc1 $f10, THREAD_FP10($k0) - sdc1 $f12, THREAD_FP12($k0) - sdc1 $f14, THREAD_FP14($k0) - sdc1 $f16, THREAD_FP16($k0) - sdc1 $f18, THREAD_FP18($k0) - sdc1 $f20, THREAD_FP20($k0) - sdc1 $f22, THREAD_FP22($k0) - sdc1 $f24, THREAD_FP24($k0) - sdc1 $f26, THREAD_FP26($k0) - sdc1 $f28, THREAD_FP28($k0) - sdc1 $f30, THREAD_FP30($k0) + sw t0, THREAD_FPCSR(k0) +.set reorder + sdc1 $f0, THREAD_FP0(k0) + sdc1 $f2, THREAD_FP2(k0) + sdc1 $f4, THREAD_FP4(k0) + sdc1 $f6, THREAD_FP6(k0) + sdc1 $f8, THREAD_FP8(k0) + sdc1 $f10, THREAD_FP10(k0) + sdc1 $f12, THREAD_FP12(k0) + sdc1 $f14, THREAD_FP14(k0) + sdc1 $f16, THREAD_FP16(k0) + sdc1 $f18, THREAD_FP18(k0) + sdc1 $f20, THREAD_FP20(k0) + sdc1 $f22, THREAD_FP22(k0) + sdc1 $f24, THREAD_FP24(k0) + sdc1 $f26, THREAD_FP26(k0) + sdc1 $f28, THREAD_FP28(k0) + sdc1 $f30, THREAD_FP30(k0) handle_interrupt: - // Determine the cause of the exception or interrupt and - // enter appropriate handling routine - mfc0 $t0, C0_CAUSE - sw $t0, THREAD_CAUSE($k0) - li $t1, OS_STATE_RUNNABLE - sh $t1, THREAD_STATE($k0) - andi $t1, $t0, CAUSE_EXCMASK - // Test for break exception - li $t2, EXC_BREAK - beq $t1, $t2, handle_break - nop - // Test for CpU (coprocessor unusable) exception - li $t2, EXC_CPU - beq $t1, $t2, handle_CpU - nop - // Test for interrupt, if it's not an interrupt, panic - li $t2, EXC_INT - bne $t1, $t2, panic - nop - and $s0, $k1, $t0 + /* Determine the cause of the exception or interrupt and */ + /* enter appropriate handling routine */ + MFC0( t0, C0_CAUSE) + sw t0, THREAD_CAUSE(k0) +label: + li t1, OS_STATE_RUNNABLE + sh t1, THREAD_STATE(k0) + andi t1, t0, CAUSE_EXCMASK + /* Test for break exception */ + li t2, EXC_BREAK + beq t1, t2, handle_break + /* Test for CpU (coprocessor unusable) exception */ + li t2, EXC_CPU + beq t1, t2, handle_CpU + /* Test for interrupt, if it's not an interrupt, panic */ + li t2, EXC_INT + bne t1, t2, panic + + and s0, k1, t0 next_interrupt: - // Handle external interrupt causes, using a jump table - // to enter into the appropriate handler - andi $t1, $s0, CAUSE_IPMASK - srl $t2, $t1, CAUSE_IPSHIFT + 4 - bnez $t2, 1f - nop - srl $t2, $t1, CAUSE_IPSHIFT - addi $t2, $t2, 0x10 + /* Handle external interrupt causes, using a jump table */ + /* to enter into the appropriate handler */ + andi t1, s0, CAUSE_IPMASK + srl t2, t1, CAUSE_IPSHIFT + 4 + bnez t2, 1f + + srl t2, t1, CAUSE_IPSHIFT + addi t2, t2, 0x10 1: - lui $at, %hi(__osIntOffTable) - addu $at, $at, $t2 - lbu $t2, %lo(__osIntOffTable)($at) - lui $at, %hi(__osIntTable) - addu $at, $at, $t2 - lw $t2, %lo(__osIntTable)($at) - jr $t2 - nop + lbu t2, __osIntOffTable(t2) + lw t2, __osIntTable(t2) + jr t2 /** * IP6 Interrupt * Only signalled by development hardware */ IP6_Hdlr: - // Mask out interrupt and continue - li $at, ~CAUSE_IP6 + /* Mask out interrupt and continue */ + and s0, s0, ~CAUSE_IP6 b next_interrupt - and $s0, $s0, $at /** * IP7 Interrupt * Only signalled by development hardware */ IP7_Hdlr: - // Mask out interrupt and continue - li $at, ~CAUSE_IP7 + /* Mask out interrupt and continue */ + and s0, s0, ~CAUSE_IP7 b next_interrupt - and $s0, $s0, $at /** * IP8/Counter Interrupt @@ -291,435 +271,368 @@ IP7_Hdlr: * cop0 compare register, this interrupt is triggered */ counter: - mfc0 $t1, C0_COMPARE - mtc0 $t1, C0_COMPARE - // Post counter message + MFC0( t1, C0_COMPARE) + MTC0( t1, C0_COMPARE) + /* Post counter message */ + li a0, OS_EVENT_COUNTER*8 jal send_mesg - li $a0, OS_EVENT_COUNTER*8 - // Mask out interrupt and continue - li $at, ~CAUSE_IP8 + /* Mask out interrupt and continue */ + and s0, s0, ~CAUSE_IP8 b next_interrupt - and $s0, $s0, $at /** * IP4/Cartridge Interrupt * Signalled by the N64 Disk Drive */ cart: - // Load cart callback set by __osSetHWIntrRoutine - lui $t1, %hi(__osHwIntTable) - addiu $t1, %lo(__osHwIntTable) - lw $t2, (OS_INTR_CART*HWINT_SIZE+HWINT_CALLBACK)($t1) - // Mask out interrupt - li $at, ~CAUSE_IP4 - and $s0, $s0, $at - // If the callback is NULL, handling is done - beqz $t2, send_cart_mesg - addi $t1, $t1, (OS_INTR_CART*HWINT_SIZE) - // Set up a stack and run the callback - jalr $t2 - lw $sp, HWINT_SP($t1) - beqz $v0, send_cart_mesg - nop - // Redispatch immediately if the callback returned nonzero + /* Load cart callback set by __osSetHWIntrRoutine */ + la t1, __osHwIntTable + lw t2, (OS_INTR_CART*HWINT_SIZE+HWINT_CALLBACK)(t1) + /* Mask out interrupt */ + and s0, s0, ~CAUSE_IP4 + /* If the callback is NULL, handling is done */ + addi t1, t1, (OS_INTR_CART*HWINT_SIZE) + beqz t2, send_cart_mesg + /* Set up a stack and run the callback */ + lw sp, HWINT_SP(t1) + jalr t2 + beqz v0, send_cart_mesg + /* Redispatch immediately if the callback returned nonzero */ b redispatch - nop send_cart_mesg: - // Post a cart event message + /* Post a cart event message */ + li a0, OS_EVENT_CART*8 jal send_mesg - li $a0, OS_EVENT_CART*8 - // Continue + /* Continue */ b next_interrupt - nop /** * IP3/RCP Interrupt * Signalled by the RCP for various reasons, described below */ rcp: - // Load the MI interrupts and mask with the RCP bits in the global interrupt mask - //! @bug this clobbers the t0 register which is expected to hold the value of the - //! C0_CAUSE register in the sw1 and sw2 handlers. If the sw1 or sw2 handler runs - //! after this, the interrupt will not be cleared properly. - lui $t0, %hi(__OSGlobalIntMask) - addiu $t0, %lo(__OSGlobalIntMask) - lw $t0, ($t0) - lui $s1, %hi(PHYS_TO_K1(MI_INTR_REG)) - lw $s1, %lo(PHYS_TO_K1(MI_INTR_REG))($s1) - srl $t0, $t0, RCP_IMASKSHIFT - and $s1, $s1, $t0 + /* Load the MI interrupts and mask with the RCP bits in the global interrupt mask */ + /*! @bug this clobbers the t0 register which is expected to hold the value of the */ + /*! C0_CAUSE register in the sw1 and sw2 handlers. If the sw1 or sw2 handler runs */ + /*! after this, the interrupt will not be cleared properly. */ + la t0, __OSGlobalIntMask + lw t0, (t0) + srl t0, t0, RCP_IMASKSHIFT + lw s1, PHYS_TO_K1(MI_INTR_REG) + and s1, s1, t0 /** * Signal Processor (SP) Interrupt */ -sp: - // Test for sp interrupt - andi $t1, $s1, MI_INTR_SP - beqz $t1, vi - nop - // Test for yielded or done signals in particular - lui $t4, %hi(PHYS_TO_K1(SP_STATUS_REG)) - lw $t4, %lo(PHYS_TO_K1(SP_STATUS_REG))($t4) - li $t1, (SP_CLR_INTR | SP_CLR_SIG3) - lui $at, %hi(PHYS_TO_K1(SP_STATUS_REG)) - andi $t4, $t4, (SP_STATUS_YIELDED | SP_STATUS_TASKDONE) - // Mask out SP interrupt - andi $s1, $s1, (MI_INTR_SI | MI_INTR_AI | MI_INTR_VI | MI_INTR_PI | MI_INTR_DP) - beqz $t4, sp_other_break - // Clear interrupt and signal 3 - sw $t1, %lo(PHYS_TO_K1(SP_STATUS_REG))($at) - // Post an SP event message + /* Test for sp interrupt */ + andi t1, s1, MI_INTR_SP + beqz t1, vi + /* Test for yielded or done signals in particular */ + lw t4, PHYS_TO_K1(SP_STATUS_REG) + li t1, (SP_CLR_INTR | SP_CLR_SIG3) + andi t4, t4, (SP_STATUS_YIELDED | SP_STATUS_TASKDONE) + /* Mask out SP interrupt */ + andi s1, s1, (MI_INTR_SI | MI_INTR_AI | MI_INTR_VI | MI_INTR_PI | MI_INTR_DP) + sw t1, PHYS_TO_K1(SP_STATUS_REG) + beqz t4, sp_other_break + /* Clear interrupt and signal 3 */ + /* Post an SP event message */ + li a0, OS_EVENT_SP*8 jal send_mesg - li $a0, OS_EVENT_SP*8 - beqz $s1, NoMoreRcpInts - nop - // Step over sp_other_break handler + beqz s1, NoMoreRcpInts + /* Step over sp_other_break handler */ b vi - nop sp_other_break: - // An sp signal that is not due to yielding or task completion, such as - // an sp breakpoint. Post a different event message + /* An sp signal that is not due to yielding or task completion, such as */ + /* an sp breakpoint. Post a different event message */ + li a0, OS_EVENT_SP_BREAK*8 jal send_mesg - li $a0, OS_EVENT_SP_BREAK*8 - beqz $s1, NoMoreRcpInts - nop + beqz s1, NoMoreRcpInts /** * Video Interface (VI) Interrupt */ vi: - // Test for vi interrupt - andi $t1, $s1, MI_INTR_VI - beqz $t1, ai - lui $at, %hi(PHYS_TO_K1(VI_CURRENT_REG)) - // Mask out vi interrupt - andi $s1, $s1, (MI_INTR_SP | MI_INTR_SI | MI_INTR_AI | MI_INTR_PI | MI_INTR_DP) - // Clear interrupt - sw $zero, %lo(PHYS_TO_K1(VI_CURRENT_REG))($at) - // Post vi event message + /* Test for vi interrupt */ + andi t1, s1, MI_INTR_VI + beqz t1, ai + /* Mask out vi interrupt */ + andi s1, s1, (MI_INTR_SP | MI_INTR_SI | MI_INTR_AI | MI_INTR_PI | MI_INTR_DP) + /* Clear interrupt */ + sw zero, PHYS_TO_K1(VI_CURRENT_REG) + /* Post vi event message */ + li a0, OS_EVENT_VI*8 jal send_mesg - li $a0, OS_EVENT_VI*8 - beqz $s1, NoMoreRcpInts - nop + beqz s1, NoMoreRcpInts /** * Audio Interface (AI) Interrupt */ ai: - // Test for ai interrupt - andi $t1, $s1, MI_INTR_AI - beqz $t1, si - nop - li $t1, 1 - lui $at, %hi(PHYS_TO_K1(AI_STATUS_REG)) - // Mask out ai interrupt - andi $s1, $s1, (MI_INTR_SP | MI_INTR_SI | MI_INTR_VI | MI_INTR_PI | MI_INTR_DP) - // Clear interrupt - sw $t1, %lo(PHYS_TO_K1(AI_STATUS_REG))($at) - // Post ai event message + /* Test for ai interrupt */ + andi t1, s1, MI_INTR_AI + beqz t1, si + + /* Mask out ai interrupt */ + andi s1, s1, (MI_INTR_SP | MI_INTR_SI | MI_INTR_VI | MI_INTR_PI | MI_INTR_DP) + /* Clear interrupt */ + li t1, 1 + sw t1, PHYS_TO_K1(AI_STATUS_REG) + /* Post ai event message */ + li a0, OS_EVENT_AI*8 jal send_mesg - li $a0, OS_EVENT_AI*8 - beqz $s1, NoMoreRcpInts - nop + beqz s1, NoMoreRcpInts /** * Serial Interface (SI) Interrupt */ si: - // Test for si interrupt - andi $t1, $s1, MI_INTR_SI - beqz $t1, pi - lui $at, %hi(PHYS_TO_K1(SI_STATUS_REG)) - // Mask out si interrupt - andi $s1, $s1, (MI_INTR_SP | MI_INTR_AI | MI_INTR_VI | MI_INTR_PI | MI_INTR_DP) - // Clear interrupt - sw $zero, %lo(PHYS_TO_K1(SI_STATUS_REG))($at) - // Post si event message + /* Test for si interrupt */ + andi t1, s1, MI_INTR_SI + beqz t1, pi + + /* Mask out si interrupt */ + andi s1, s1, (MI_INTR_SP | MI_INTR_AI | MI_INTR_VI | MI_INTR_PI | MI_INTR_DP) + /* Clear interrupt */ + sw zero, PHYS_TO_K1(SI_STATUS_REG) + /* Post si event message */ + li a0, OS_EVENT_SI*8 jal send_mesg - li $a0, OS_EVENT_SI*8 - beqz $s1, NoMoreRcpInts - nop + beqz s1, NoMoreRcpInts /** * Parallel Interface (PI) Interrupt */ pi: - // Test for pi interrupt - andi $t1, $s1, MI_INTR_PI - beqz $t1, dp - nop + /* Test for pi interrupt */ + andi t1, s1, MI_INTR_PI + beqz t1, dp + + /* Clear and mask the interrupt */ #if LIBULTRA_VERSION < LIBULTRA_VERSION_J - // Clear interrupt and mask out pi interrupt - li $t1, PI_STATUS_CLR_INTR - lui $at, %hi(PHYS_TO_K1(PI_STATUS_REG)) - andi $s1, $s1, (MI_INTR_SP | MI_INTR_SI | MI_INTR_AI | MI_INTR_VI | MI_INTR_DP) - sw $t1, %lo(PHYS_TO_K1(PI_STATUS_REG))($at) + li t1, PI_STATUS_CLR_INTR + andi s1, s1, (MI_INTR_SP | MI_INTR_SI | MI_INTR_AI | MI_INTR_VI | MI_INTR_DP) + sw t1, PHYS_TO_K1(PI_STATUS_REG) #else - // Clear interrupt - li $t1, PI_STATUS_CLR_INTR - lui $at, %hi(PHYS_TO_K1(PI_STATUS_REG)) - sw $t1, %lo(PHYS_TO_K1(PI_STATUS_REG))($at) - // Load pi callback - lui $t1, %hi(__osPiIntTable) - addiu $t1, %lo(__osPiIntTable) - lw $t2, HWINT_CALLBACK($t1) - // Mask out pi interrupt - andi $s1, $s1, (MI_INTR_SP | MI_INTR_SI | MI_INTR_AI | MI_INTR_VI | MI_INTR_DP) - // Skip callback if NULL - beqz $t2, no_pi_callback - nop - // Set up a stack and run the callback - lw $sp, HWINT_SP($t1) - jalr $t2 - move $a0, $v0 - // If the callback returns non-zero, don't post a pi event message - bnez $v0, skip_pi_mesg - nop + /* Clear the interrupt */ + li t1, PI_STATUS_CLR_INTR + sw t1, PHYS_TO_K1(PI_STATUS_REG) + /* Load pi callback */ + la t1, __osPiIntTable + lw t2, HWINT_CALLBACK(t1) + /* Mask out pi interrupt */ + andi s1, s1, (MI_INTR_SP | MI_INTR_SI | MI_INTR_AI | MI_INTR_VI | MI_INTR_DP) + /* Skip callback if NULL */ + beqz t2, no_pi_callback + /* Set up a stack and run the callback */ + lw sp, HWINT_SP(t1) + move a0, v0 + jalr t2 + /* If the callback returns non-zero, don't post a pi event message */ + bnez v0, skip_pi_mesg #endif no_pi_callback: - // Post pi event message + /* Post pi event message */ + li a0, OS_EVENT_PI*8 jal send_mesg - li $a0, OS_EVENT_PI*8 skip_pi_mesg: - beqz $s1, NoMoreRcpInts - nop + beqz s1, NoMoreRcpInts /** * Display Processor (DP) Interrupt */ dp: - // Test for dp interrupt - andi $t1, $s1, MI_INTR_DP - beqz $t1, NoMoreRcpInts - nop - // Clear dp interrupt - li $t1, MI_CLR_DP_INTR - lui $at, %hi(PHYS_TO_K1(MI_INIT_MODE_REG)) - // Mask out dp interrupt - andi $s1, $s1, (MI_INTR_SP | MI_INTR_SI | MI_INTR_AI | MI_INTR_VI | MI_INTR_PI) - sw $t1, %lo(PHYS_TO_K1(MI_INIT_MODE_REG))($at) - // Post dp event message + /* Test for dp interrupt */ + andi t1, s1, MI_INTR_DP + beqz t1, NoMoreRcpInts + + /* Mask out dp interrupt */ + andi s1, s1, (MI_INTR_SP | MI_INTR_SI | MI_INTR_AI | MI_INTR_VI | MI_INTR_PI) + /* Clear dp interrupt */ + li t1, MI_CLR_DP_INTR + sw t1, PHYS_TO_K1(MI_INIT_MODE_REG) + /* Post dp event message */ + li a0, OS_EVENT_DP*8 jal send_mesg - li $a0, OS_EVENT_DP*8 NoMoreRcpInts: - // Mask out interrupt and continue - li $at, ~CAUSE_IP3 + /* Mask out interrupt and continue */ + and s0, s0, ~CAUSE_IP3 b next_interrupt - and $s0, $s0, $at /** * IP5/PreNMI Interrupt * Reset button has been pressed */ prenmi: - // Disable IP5/PreNMI interrupt for the previously running thread - lw $k1, THREAD_SR($k0) - li $at, ~SR_IBIT5 - lui $t1, %hi(__osShutdown) - and $k1, $k1, $at - sw $k1, THREAD_SR($k0) - addiu $t1, %lo(__osShutdown) - // Test __osShutdown for first PreNMI event - lw $t2, ($t1) - beqz $t2, firstnmi - li $at, ~CAUSE_IP5 - // Mask out interrupt and redispatch immediately + /* Disable IP5/PreNMI interrupt for the previously running thread */ + lw k1, THREAD_SR(k0) + and k1, k1, ~SR_IBIT5 + sw k1, THREAD_SR(k0) + /* Test __osShutdown for first PreNMI event */ + la t1, __osShutdown + lw t2, (t1) + beqz t2, firstnmi + /* Mask out interrupt and redispatch immediately */ + and s0, s0, ~CAUSE_IP5 b redispatch - and $s0, $s0, $at firstnmi: - // Set __osShutdown - li $t2, 1 - sw $t2, ($t1) - // Post a PreNMI event message + /* Set __osShutdown */ + li t2, 1 + sw t2, (t1) + /* Post a PreNMI event message */ + li a0, OS_EVENT_PRENMI*8 jal send_mesg - li $a0, OS_EVENT_PRENMI*8 - // Mask out and disable IP5/PreNMI interrupt for the highest priority thread - lui $t2, %hi(__osRunQueue) - lw $t2, %lo(__osRunQueue)($t2) - li $at, ~SR_IBIT5 - and $s0, $s0, $at - lw $k1, THREAD_SR($t2) - and $k1, $k1, $at - // Redispatch immediately + /* Mask out and disable IP5/PreNMI interrupt for the highest priority thread */ + lw t2, __osRunQueue + and s0, s0, ~SR_IBIT5 + lw k1, THREAD_SR(t2) + and k1, k1, ~SR_IBIT5 + sw k1, THREAD_SR(t2) + /* Redispatch immediately */ b redispatch - sw $k1, THREAD_SR($t2) sw2: - // Mask out interrupt - li $at, ~CAUSE_SW2 - and $t0, $t0, $at - mtc0 $t0, C0_CAUSE - // Post sw2 event message + /* Mask out interrupt */ + and t0, t0, ~CAUSE_SW2 + MTC0( t0, C0_CAUSE) + /* Post sw2 event message */ + li a0, OS_EVENT_SW2*8 jal send_mesg - li $a0, OS_EVENT_SW2*8 - li $at, ~CAUSE_SW2 - // Mask out interrupt and continue + /* Mask out interrupt and continue */ + and s0, s0, ~CAUSE_SW2 b next_interrupt - and $s0, $s0, $at sw1: - // Mask out interrupt - li $at, ~CAUSE_SW1 - and $t0, $t0, $at - mtc0 $t0, C0_CAUSE - // Post sw1 event message + /* Mask out interrupt */ + and t0, t0, ~CAUSE_SW1 + MTC0( t0, C0_CAUSE) + /* Post sw1 event message */ + li a0, OS_EVENT_SW1*8 jal send_mesg - li $a0, OS_EVENT_SW1*8 - li $at, ~CAUSE_SW1 - // Mask out interrupt and continue + /* Mask out interrupt and continue */ + and s0, s0, ~CAUSE_SW1 b next_interrupt - and $s0, $s0, $at handle_break: - // Set last thread as having hit a break exception - li $t1, OS_FLAG_CPU_BREAK - sh $t1, THREAD_FLAGS($k0) - // Post a cpu break event message + /* Set last thread as having hit a break exception */ + li t1, OS_FLAG_CPU_BREAK + sh t1, THREAD_FLAGS(k0) + /* Post a cpu break event message */ + li a0, OS_EVENT_CPU_BREAK*8 jal send_mesg - li $a0, OS_EVENT_CPU_BREAK*8 - // Redispatch + /* Redispatch */ b redispatch - nop redispatch: - lui $t2, %hi(__osRunQueue) - lw $t2, %lo(__osRunQueue)($t2) - // Get priority of previously running thread - lw $t1, THREAD_PRI($k0) - // Get highest priority from waiting threads - lw $t3, THREAD_PRI($t2) - slt $at, $t1, $t3 - beqz $at, enqueueRunning - nop - // The previously running thread is no longer the highest priority, - // enqueue it to the run queue to wait its turn again - lui $a0, %hi(__osRunQueue) - move $a1, $k0 + lw t2, __osRunQueue + /* Get priority of previously running thread */ + lw t1, THREAD_PRI(k0) + /* Get highest priority from waiting threads */ + lw t3, THREAD_PRI(t2) + bge t1, t3, enqueueRunning + /* The previously running thread is no longer the highest priority, */ + /* enqueue it to the run queue to wait its turn again */ + move a1, k0 + la a0, __osRunQueue jal __osEnqueueThread - addiu $a0, $a0, %lo(__osRunQueue) + j __osDispatchThread - nop /** * Resume the previously running thread by placing it at the top of * the run queue and dispatching it */ enqueueRunning: - lui $t1, %hi(__osRunQueue) - addiu $t1, $t1, %lo(__osRunQueue) - lw $t2, ($t1) - sw $t2, THREAD_NEXT($k0) + la t1, __osRunQueue + lw t2, (t1) + sw t2, THREAD_NEXT(k0) + sw k0, (t1) j __osDispatchThread - sw $k0, ($t1) /** * Unhandled exceptions & interrupts end up here, * trap to software by posting a fault message */ panic: - // Mark the thread as having faulted - lui $at, %hi(__osFaultedThread) - sw $k0, %lo(__osFaultedThread)($at) - li $t1, OS_STATE_STOPPED - sh $t1, THREAD_STATE($k0) - li $t1, OS_FLAG_FAULT - sh $t1, THREAD_FLAGS($k0) - // Save C0_BADVADDR - mfc0 $t2, C0_BADVADDR - sw $t2, THREAD_BADVADDR($k0) - // Post the fault message + /* Mark the thread as having faulted */ + sw k0, __osFaultedThread + li t1, OS_STATE_STOPPED + sh t1, THREAD_STATE(k0) + li t1, OS_FLAG_FAULT + sh t1, THREAD_FLAGS(k0) + /* Save C0_BADVADDR */ + MFC0( t2, C0_BADVADDR) + sw t2, THREAD_BADVADDR(k0) + /* Post the fault message */ + li a0, OS_EVENT_FAULT*8 jal send_mesg - li $a0, OS_EVENT_FAULT*8 - // Dispatch next thread + /* Dispatch next thread */ j __osDispatchThread - nop /** * Handles posting event messages to the listening message queue, if there is one */ send_mesg: - // Load pointer to listening message queue - lui $t2, %hi(__osEventStateTab) - addiu $t2, %lo(__osEventStateTab) - addu $t2, $t2, $a0 - lw $t1, ($t2) - // Save return address - move $s2, $ra - // If there is no listening message queue, done - beqz $t1, send_done - nop - // Test if the message queue is full, if so don't post the message - lw $t3, MQ_VALIDCOUNT($t1) - lw $t4, MQ_MSGCOUNT($t1) - slt $at, $t3, $t4 - beqz $at, send_done - nop - // Add validcount to first and modulo with msgcount - lw $t5, MQ_FIRST($t1) - addu $t5, $t5, $t3 - // Modulo - div $zero, $t5, $t4 - bnez $t4, 1f - nop - break 7 // div0 -1: - li $at, -1 - bne $t4, $at, 2f - li $at, -0x80000000 - bne $t5, $at, 2f - nop - break 6 // overflow -2: - // End Modulo - lw $t4, MQ_MSG($t1) - mfhi $t5 - sll $t5, $t5, 2 - addu $t4, $t4, $t5 - // Fetch the message to post - lw $t5, 4($t2) - addiu $t2, $t3, 1 - // Post the message to the message queue - sw $t5, ($t4) - // Increment the validCount - sw $t2, MQ_VALIDCOUNT($t1) - // If there was a thread blocked on this message queue, - // wake it up - lw $t2, MQ_MTQUEUE($t1) - lw $t3, ($t2) - beqz $t3, send_done - nop + /* Load pointer to listening message queue */ + la t2, __osEventStateTab + addu t2, t2, a0 + lw t1, (t2) + /* Save return address */ + move s2, ra + /* If there is no listening message queue, done */ + beqz t1, send_done + + /* Test if the message queue is full, if so don't post the message */ + lw t3, MQ_VALIDCOUNT(t1) + lw t4, MQ_MSGCOUNT(t1) + bge t3, t4, send_done + + /* Add validcount to first and modulo with msgcount */ + lw t5, MQ_FIRST(t1) + addu t5, t5, t3 + rem t5, t5, t4 + lw t4, MQ_MSG(t1) + sll t5, t5, 2 + addu t4, t4, t5 + /* Fetch the message to post */ + lw t5, 4(t2) + addiu t2, t3, 1 + /* Post the message to the message queue */ + sw t5, (t4) + /* Increment the validCount */ + sw t2, MQ_VALIDCOUNT(t1) + /* If there was a thread blocked on this message queue, */ + /* wake it up */ + lw t2, MQ_MTQUEUE(t1) + lw t3, (t2) + beqz t3, send_done + move a0, t1 jal __osPopThread - move $a0, $t1 - move $t2, $v0 - lui $a0, %hi(__osRunQueue) - move $a1, $t2 + move t2, v0 + move a1, t2 + la a0, __osRunQueue jal __osEnqueueThread - addiu $a0, %lo(__osRunQueue) send_done: - jr $s2 - nop + jr s2 /** * Handle coprocessor unusable exception */ handle_CpU: - li $at, CAUSE_CEMASK - and $t1, $t0, $at - srl $t1, $t1, CAUSE_CESHIFT - li $t2, 1 // if not coprocessor 1, panic - bne $t1, $t2, panic - nop - // Mark cop1 as usable for previous thread - lw $k1, THREAD_SR($k0) - li $at, SR_CU1 - li $t1, 1 - or $k1, $k1, $at - sw $t1, THREAD_FP($k0) + and t1, t0, CAUSE_CEMASK + srl t1, t1, CAUSE_CESHIFT + li t2, 1 /* if not coprocessor 1, panic */ + bne t1, t2, panic + /* Mark cop1 as usable for previous thread */ + lw k1, THREAD_SR(k0) + li t1, 1 + or k1, k1, SR_CU1 + sw t1, THREAD_FP(k0) + sw k1, THREAD_SR(k0) b enqueueRunning - sw $k1, THREAD_SR($k0) END(__osException) /** @@ -731,83 +644,71 @@ END(__osException) * unblocked runnable thread. */ LEAF(__osEnqueueAndYield) - lui $a1, %hi(__osRunningThread) - lw $a1, %lo(__osRunningThread)($a1) - // Save SR - mfc0 $t0, C0_SR - lw $k1, THREAD_FP($a1) - ori $t0, $t0, SR_EXL - sw $t0, THREAD_SR($a1) - // Save callee-saved registers - sd $s0, THREAD_S0($a1) - sd $s1, THREAD_S1($a1) - sd $s2, THREAD_S2($a1) - sd $s3, THREAD_S3($a1) - sd $s4, THREAD_S4($a1) - sd $s5, THREAD_S5($a1) - sd $s6, THREAD_S6($a1) - sd $s7, THREAD_S7($a1) - sd $gp, THREAD_GP($a1) - sd $sp, THREAD_SP($a1) - sd $fp, THREAD_S8($a1) - sd $ra, THREAD_RA($a1) - // Save FPU callee-saved registers if the current thread has used the FPU - beqz $k1, 1f - sw $ra, THREAD_PC($a1) - cfc1 $k1, C1_FPCSR - sdc1 $f20, THREAD_FP20($a1) - sdc1 $f22, THREAD_FP22($a1) - sdc1 $f24, THREAD_FP24($a1) - sdc1 $f26, THREAD_FP26($a1) - sdc1 $f28, THREAD_FP28($a1) - sdc1 $f30, THREAD_FP30($a1) - sw $k1, THREAD_FPCSR($a1) + lw a1, __osRunningThread + /* Save SR */ + MFC0( t0, C0_SR) + lw k1, THREAD_FP(a1) + ori t0, t0, SR_EXL + sw t0, THREAD_SR(a1) + /* Save callee-saved registers */ + sd s0, THREAD_S0(a1) + sd s1, THREAD_S1(a1) + sd s2, THREAD_S2(a1) + sd s3, THREAD_S3(a1) + sd s4, THREAD_S4(a1) + sd s5, THREAD_S5(a1) + sd s6, THREAD_S6(a1) + sd s7, THREAD_S7(a1) + sd gp, THREAD_GP(a1) + sd sp, THREAD_SP(a1) + sd fp, THREAD_S8(a1) + sd ra, THREAD_RA(a1) + sw ra, THREAD_PC(a1) + /* Save FPU callee-saved registers if the current thread has used the FPU */ + beqz k1, 1f + cfc1 k1, C1_FPCSR + sdc1 $f20, THREAD_FP20(a1) + sdc1 $f22, THREAD_FP22(a1) + sdc1 $f24, THREAD_FP24(a1) + sdc1 $f26, THREAD_FP26(a1) + sdc1 $f28, THREAD_FP28(a1) + sdc1 $f30, THREAD_FP30(a1) + sw k1, THREAD_FPCSR(a1) 1: - lw $k1, THREAD_SR($a1) - andi $t1, $k1, SR_IMASK - beqz $t1, 2f - nop - // This code does the same thing as the block just above the `savercp` label. - // See the comment there for more about this. - lui $t0, %hi(__OSGlobalIntMask) - addiu $t0, %lo(__OSGlobalIntMask) - lw $t0, ($t0) - li $at, ~0 - xor $t0, $t0, $at - lui $at, ((~SR_IMASK) >> 0x10) & 0xFFFF - andi $t0, $t0, SR_IMASK - ori $at, (~SR_IMASK) & 0xFFFF - or $t1, $t1, $t0 - and $k1, $k1, $at - or $k1, $k1, $t1 - sw $k1, THREAD_SR($a1) + lw k1, THREAD_SR(a1) + andi t1, k1, SR_IMASK + beqz t1, 2f + /* This code does the same thing as the block just above the `savercp` label. */ + /* See the comment there for more about this. */ + la t0, __OSGlobalIntMask + lw t0, (t0) + xor t0, t0, ~0 + andi t0, t0, SR_IMASK + or t1, t1, t0 + and k1, k1, ~SR_IMASK + or k1, k1, t1 + sw k1, THREAD_SR(a1) 2: - lui $k1, %hi(PHYS_TO_K1(MI_INTR_MASK_REG)) - lw $k1, %lo(PHYS_TO_K1(MI_INTR_MASK_REG))($k1) - beqz $k1, 3f - nop - // This code does the same thing as the block just below the `savercp` label. - // See the comment there for more about this. - lui $k0, %hi(__OSGlobalIntMask) - addiu $k0, %lo(__OSGlobalIntMask) - lw $k0, ($k0) - lw $t0, THREAD_RCP($a1) - li $at, ~0 - srl $k0, $k0, RCP_IMASKSHIFT - xor $k0, $k0, $at - andi $k0, $k0, (RCP_IMASK >> RCP_IMASKSHIFT) - and $k0, $k0, $t0 - or $k1, $k1, $k0 + lw k1, PHYS_TO_K1(MI_INTR_MASK_REG) + beqz k1, 3f + /* This code does the same thing as the block just below the `savercp` label. */ + /* See the comment there for more about this. */ + la k0, __OSGlobalIntMask + lw k0, (k0) + lw t0, THREAD_RCP(a1) + srl k0, k0, RCP_IMASKSHIFT + xor k0, k0, ~0 + andi k0, k0, (RCP_IMASK >> RCP_IMASKSHIFT) + and k0, k0, t0 + or k1, k1, k0 3: - // If the specified thread queue is null, skip - // straight to dispatching - beqz $a0, no_enqueue - sw $k1, THREAD_RCP($a1) + /* If the specified thread queue is null, skip */ + /* straight to dispatching */ + sw k1, THREAD_RCP(a1) + beqz a0, no_enqueue jal __osEnqueueThread - nop no_enqueue: j __osDispatchThread - nop END(__osEnqueueAndYield) /** @@ -816,31 +717,27 @@ END(__osEnqueueAndYield) * Enqueues `thread` to the thread queue `threadQueue`, inserted by priority */ LEAF(__osEnqueueThread) - lw $t8, ($a0) - lw $t7, THREAD_PRI($a1) - move $t9, $a0 - lw $t6, THREAD_PRI($t8) - slt $at, $t6, $t7 - // If the current highest priority thread is a lower priority than - // the new thread, skip searching the queue - bnez $at, 2f - nop + lw t8, (a0) + lw t7, THREAD_PRI(a1) + move t9, a0 + lw t6, THREAD_PRI(t8) + /* If the current highest priority thread is a lower priority than */ + /* the new thread, skip searching the queue */ + blt t6, t7, 2f 1: - // Search the queue for the position to insert the thread to maintain - // ordering by priority - move $t9, $t8 - lw $t8, THREAD_NEXT($t8) - lw $t6, THREAD_PRI($t8) - slt $at, $t6, $t7 - beqz $at, 1b - nop + /* Search the queue for the position to insert the thread to maintain */ + /* ordering by priority */ + move t9, t8 + lw t8, THREAD_NEXT(t8) + lw t6, THREAD_PRI(t8) + bge t6, t7, 1b 2: - // Insert the thread into the queue - lw $t8, ($t9) - sw $t8, THREAD_NEXT($a1) - sw $a1, ($t9) - jr $ra - sw $a0, THREAD_QUEUE($a1) + /* Insert the thread into the queue */ + lw t8, (t9) + sw t8, THREAD_NEXT(a1) + sw a1, (t9) + sw a0, THREAD_QUEUE(a1) + jr ra END(__osEnqueueThread) /** @@ -850,16 +747,15 @@ END(__osEnqueueThread) * thread queue `threadQueue` and returns it */ LEAF(__osPopThread) - lw $v0, ($a0) - lw $t9, THREAD_NEXT($v0) - jr $ra - sw $t9, ($a0) + lw v0, (a0) + lw t9, THREAD_NEXT(v0) + sw t9, (a0) + jr ra END(__osPopThread) #if LIBULTRA_VERSION >= LIBULTRA_VERSION_K LEAF(__osNop) - jr $ra - nop + jr ra END(__osNop) #endif @@ -869,113 +765,111 @@ END(__osNop) * Dispatches the next thread to run after restoring the context */ LEAF(__osDispatchThread) - // Obtain highest priority thread from the active run queue - lui $a0, %hi(__osRunQueue) + /* Obtain highest priority thread from the active run queue */ + la a0, __osRunQueue jal __osPopThread - addiu $a0, $a0, %lo(__osRunQueue) - // Set thread as running - lui $at, %hi(__osRunningThread) - sw $v0, %lo(__osRunningThread)($at) - li $t0, OS_STATE_RUNNING - sh $t0, THREAD_STATE($v0) - // Restore SR, masking out any interrupts that are not also - // enabled in the global interrupt mask - move $k0, $v0 - lui $t0, %hi(__OSGlobalIntMask) - lw $k1, THREAD_SR($k0) - addiu $t0, %lo(__OSGlobalIntMask) - lw $t0, ($t0) - lui $at, ((~SR_IMASK) >> 0x10) & 0xFFFF - andi $t1, $k1, SR_IMASK - ori $at, (~SR_IMASK) & 0xFFFF - andi $t0, $t0, SR_IMASK - and $t1, $t1, $t0 - and $k1, $k1, $at - or $k1, $k1, $t1 - mtc0 $k1, C0_SR - // Restore GPRs - ld $k1, THREAD_LO($k0) - ld $at, THREAD_AT($k0) - ld $v0, THREAD_V0($k0) - mtlo $k1 - ld $k1, THREAD_HI($k0) - ld $v1, THREAD_V1($k0) - ld $a0, THREAD_A0($k0) - ld $a1, THREAD_A1($k0) - ld $a2, THREAD_A2($k0) - ld $a3, THREAD_A3($k0) - ld $t0, THREAD_T0($k0) - ld $t1, THREAD_T1($k0) - ld $t2, THREAD_T2($k0) - ld $t3, THREAD_T3($k0) - ld $t4, THREAD_T4($k0) - ld $t5, THREAD_T5($k0) - ld $t6, THREAD_T6($k0) - ld $t7, THREAD_T7($k0) - ld $s0, THREAD_S0($k0) - ld $s1, THREAD_S1($k0) - ld $s2, THREAD_S2($k0) - ld $s3, THREAD_S3($k0) - ld $s4, THREAD_S4($k0) - ld $s5, THREAD_S5($k0) - ld $s6, THREAD_S6($k0) - ld $s7, THREAD_S7($k0) - ld $t8, THREAD_T8($k0) - ld $t9, THREAD_T9($k0) - ld $gp, THREAD_GP($k0) - mthi $k1 - ld $sp, THREAD_SP($k0) - ld $fp, THREAD_S8($k0) - ld $ra, THREAD_RA($k0) - // Move thread pc to EPC so that eret will return execution to where the thread left off - lw $k1, THREAD_PC($k0) - mtc0 $k1, C0_EPC - // Check if the FPU was used by this thread and if so also restore the FPU registers - lw $k1, THREAD_FP($k0) - beqz $k1, 1f - nop - lw $k1, THREAD_FPCSR($k0) - ctc1 $k1, C1_FPCSR - ldc1 $f0, THREAD_FP0($k0) - ldc1 $f2, THREAD_FP2($k0) - ldc1 $f4, THREAD_FP4($k0) - ldc1 $f6, THREAD_FP6($k0) - ldc1 $f8, THREAD_FP8($k0) - ldc1 $f10, THREAD_FP10($k0) - ldc1 $f12, THREAD_FP12($k0) - ldc1 $f14, THREAD_FP14($k0) - ldc1 $f16, THREAD_FP16($k0) - ldc1 $f18, THREAD_FP18($k0) - ldc1 $f20, THREAD_FP20($k0) - ldc1 $f22, THREAD_FP22($k0) - ldc1 $f24, THREAD_FP24($k0) - ldc1 $f26, THREAD_FP26($k0) - ldc1 $f28, THREAD_FP28($k0) - ldc1 $f30, THREAD_FP30($k0) + /* Set thread as running */ + sw v0, __osRunningThread + li t0, OS_STATE_RUNNING + sh t0, THREAD_STATE(v0) + /* Restore SR, masking out any interrupts that are not also */ + /* enabled in the global interrupt mask */ + move k0, v0 + lw k1, THREAD_SR(k0) + la t0, __OSGlobalIntMask + lw t0, (t0) + andi t1, k1, SR_IMASK + andi t0, t0, SR_IMASK + and t1, t1, t0 + and k1, k1, ~SR_IMASK + or k1, k1, t1 + MTC0( k1, C0_SR) + /* Restore GPRs */ +.set noat + ld AT, THREAD_AT(k0) + ld v0, THREAD_V0(k0) + ld v1, THREAD_V1(k0) + ld a0, THREAD_A0(k0) + ld a1, THREAD_A1(k0) + ld a2, THREAD_A2(k0) + ld a3, THREAD_A3(k0) + ld t0, THREAD_T0(k0) + ld t1, THREAD_T1(k0) + ld t2, THREAD_T2(k0) + ld t3, THREAD_T3(k0) + ld t4, THREAD_T4(k0) + ld t5, THREAD_T5(k0) + ld t6, THREAD_T6(k0) + ld t7, THREAD_T7(k0) + ld s0, THREAD_S0(k0) + ld s1, THREAD_S1(k0) + ld s2, THREAD_S2(k0) + ld s3, THREAD_S3(k0) + ld s4, THREAD_S4(k0) + ld s5, THREAD_S5(k0) + ld s6, THREAD_S6(k0) + ld s7, THREAD_S7(k0) + ld t8, THREAD_T8(k0) + ld t9, THREAD_T9(k0) + ld gp, THREAD_GP(k0) + ld k1, THREAD_LO(k0) + mtlo k1 + ld k1, THREAD_HI(k0) + mthi k1 + ld sp, THREAD_SP(k0) + ld fp, THREAD_S8(k0) + ld ra, THREAD_RA(k0) + /* Move thread pc to EPC so that eret will return execution to where the thread left off */ + lw k1, THREAD_PC(k0) + MTC0( k1, C0_EPC) + /* Check if the FPU was used by this thread and if so also restore the FPU registers */ + lw k1, THREAD_FP(k0) + beqz k1, 1f + +.set noreorder + lw k1, THREAD_FPCSR(k0) + ctc1 k1, C1_FPCSR +.set reorder + ldc1 $f0, THREAD_FP0(k0) + ldc1 $f2, THREAD_FP2(k0) + ldc1 $f4, THREAD_FP4(k0) + ldc1 $f6, THREAD_FP6(k0) + ldc1 $f8, THREAD_FP8(k0) + ldc1 $f10, THREAD_FP10(k0) + ldc1 $f12, THREAD_FP12(k0) + ldc1 $f14, THREAD_FP14(k0) + ldc1 $f16, THREAD_FP16(k0) + ldc1 $f18, THREAD_FP18(k0) + ldc1 $f20, THREAD_FP20(k0) + ldc1 $f22, THREAD_FP22(k0) + ldc1 $f24, THREAD_FP24(k0) + ldc1 $f26, THREAD_FP26(k0) + ldc1 $f28, THREAD_FP28(k0) + ldc1 $f30, THREAD_FP30(k0) 1: - // Restore RCP interrupt mask, masking out any RCP interrupts that - // are not also enabled in the global interrupt mask - lw $k1, THREAD_RCP($k0) - lui $k0, %hi(__OSGlobalIntMask) - addiu $k0, %lo(__OSGlobalIntMask) - lw $k0, ($k0) - srl $k0, $k0, RCP_IMASKSHIFT - and $k1, $k1, $k0 - sll $k1, $k1, 1 - lui $k0, %hi(__osRcpImTable) - addiu $k0, %lo(__osRcpImTable) - addu $k1, $k1, $k0 - lhu $k1, ($k1) - lui $k0, %hi(PHYS_TO_K1(MI_INTR_MASK_REG)) - addiu $k0, %lo(PHYS_TO_K1(MI_INTR_MASK_REG)) - sw $k1, ($k0) - // Empty pipeline + /* Restore RCP interrupt mask, masking out any RCP interrupts that */ + /* are not also enabled in the global interrupt mask */ +.set noreorder + lw k1, THREAD_RCP(k0) + la k0, __OSGlobalIntMask + lw k0, (k0) + srl k0, k0, RCP_IMASKSHIFT + and k1, k1, k0 + sll k1, k1, 1 + la k0, __osRcpImTable + addu k1, k1, k0 + lhu k1, (k1) + la k0, PHYS_TO_K1(MI_INTR_MASK_REG) + sw k1, (k0) + /* Empty pipeline */ nop nop nop nop - // Resume thread execution + /* Resume thread execution */ eret +.set reorder +.set at END(__osDispatchThread) /** @@ -986,7 +880,7 @@ END(__osDispatchThread) * current thread to be destroyed. */ LEAF(__osCleanupThread) + move a0, zero jal osDestroyThread - move $a0, $zero - // Despite being a jal, this function does not return as the thread will have been destroyed + /* Despite being a jal, this function does not return as the thread will have been destroyed */ END(__osCleanupThread) diff --git a/src/libultra/os/getcause.s b/src/libultra/os/getcause.s index a621e97363..b59c5c6801 100644 --- a/src/libultra/os/getcause.s +++ b/src/libultra/os/getcause.s @@ -1,14 +1,10 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" -.set noreorder - -.section .text - -.balign 16 +.text LEAF(__osGetCause) - mfc0 $v0, C0_CAUSE - jr $ra - nop + MFC0( v0, C0_CAUSE) + jr ra END(__osGetCause) diff --git a/src/libultra/os/getcount.s b/src/libultra/os/getcount.s index 242c9da692..84552adb00 100644 --- a/src/libultra/os/getcount.s +++ b/src/libultra/os/getcount.s @@ -1,14 +1,10 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" -.set noreorder - -.section .text - -.balign 16 +.text LEAF(osGetCount) - mfc0 $v0, C0_COUNT - jr $ra - nop + MFC0( v0, C0_COUNT) + jr ra END(osGetCount) diff --git a/src/libultra/os/getfpccsr.s b/src/libultra/os/getfpccsr.s index 93d4e51b8e..37c0e8c3b2 100644 --- a/src/libultra/os/getfpccsr.s +++ b/src/libultra/os/getfpccsr.s @@ -1,14 +1,10 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" -.set noreorder - -.section .text - -.balign 16 +.text LEAF(__osGetFpcCsr) - cfc1 $v0, C1_FPCSR - jr $ra - nop + CFC1( v0, C1_FPCSR) + jr ra END(__osGetFpcCsr) diff --git a/src/libultra/os/getintmask.s b/src/libultra/os/getintmask.s index 2c6ddc86ed..f734103938 100644 --- a/src/libultra/os/getintmask.s +++ b/src/libultra/os/getintmask.s @@ -1,14 +1,10 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" #include "ultra64/rcp.h" #include "ultra64/exception.h" -.set noat -.set noreorder - -.section .text - -.balign 16 +.text /** * OSIntMask osGetIntMask(void); @@ -23,38 +19,35 @@ * See the comment for osSetIntMask for more details. */ LEAF(osGetIntMask) - // Extract interrupt enable bits from current SR - mfc0 $v0, C0_SR - andi $v0, $v0, (SR_IMASK | SR_IE) - // Get value of __OSGlobalIntMask - lui $t0, %hi(__OSGlobalIntMask) - addiu $t0, %lo(__OSGlobalIntMask) - lw $t1, 0($t0) - // Bitwise-OR in the disabled CPU bits of __OSGlobalIntMask - li $at, ~0 - xor $t0, $t1, $at - andi $t0, $t0, SR_IMASK - or $v0, $v0, $t0 - // Fetch MI_INTR_MASK_REG - lui $t1, %hi(PHYS_TO_K1(MI_INTR_MASK_REG)) - lw $t1, %lo(PHYS_TO_K1(MI_INTR_MASK_REG))($t1) - // If there are RCP interrupts masked - beqz $t1, 1f - // Get value of __OSGlobalIntMask - lui $t0, %hi(__OSGlobalIntMask) - addiu $t0, %lo(__OSGlobalIntMask) - lw $t0, 0($t0) - // Bitwise-OR in the disabled RCP bits of __OSGlobalIntMask - srl $t0, $t0, RCP_IMASKSHIFT - li $at, ~0 - xor $t0, $t0, $at - andi $t0, $t0, (RCP_IMASK >> RCP_IMASKSHIFT) - or $t1, $t1, $t0 +.set noreorder + /* Extract interrupt enable bits from current SR */ + mfc0 v0, C0_SR + andi v0, v0, (SR_IMASK | SR_IE) + /* Get value of __OSGlobalIntMask */ + la t0, __OSGlobalIntMask + lw t1, (t0) + /* Bitwise-OR in the disabled CPU bits of __OSGlobalIntMask */ + xor t0, t1, ~0 + andi t0, t0, SR_IMASK + or v0, v0, t0 + /* Fetch MI_INTR_MASK_REG */ + lw t1, PHYS_TO_K1(MI_INTR_MASK_REG) + /* If there are RCP interrupts masked */ + beqz t1, 1f + /* Get value of __OSGlobalIntMask */ + la t0, __OSGlobalIntMask /* Note: macro expansion in delay slot */ + lw t0, (t0) + /* Bitwise-OR in the disabled RCP bits of __OSGlobalIntMask */ + srl t0, t0, RCP_IMASKSHIFT + xor t0, t0, ~0 + andi t0, t0, (RCP_IMASK >> RCP_IMASKSHIFT) + or t1, t1, t0 1: - // Shift the RCP bits to not conflict with the CPU bits - sll $t2, $t1, RCP_IMASKSHIFT - // OR the CPU and RCP bits together - or $v0, $v0, $t2 - jr $ra + /* Shift the RCP bits to not conflict with the CPU bits */ + sll t2, t1, RCP_IMASKSHIFT + /* OR the CPU and RCP bits together */ + or v0, v0, t2 + jr ra nop +.set reorder END(osGetIntMask) diff --git a/src/libultra/os/getsr.s b/src/libultra/os/getsr.s index dc6901b982..709075a7b7 100644 --- a/src/libultra/os/getsr.s +++ b/src/libultra/os/getsr.s @@ -1,14 +1,10 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" -.set noreorder - -.section .text - -.balign 16 +.text LEAF(__osGetSR) - mfc0 $v0, C0_SR - jr $ra - nop + MFC0( v0, C0_SR) + jr ra END(__osGetSR) diff --git a/src/libultra/os/interrupt.s b/src/libultra/os/interrupt.s index 829377bb0e..3a9102c579 100644 --- a/src/libultra/os/interrupt.s +++ b/src/libultra/os/interrupt.s @@ -1,53 +1,45 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" #include "ultra64/thread.h" -.set noat -.set noreorder - -.section .text - -.balign 16 +.text LEAF(__osDisableInt) - lui $t2, %hi(__OSGlobalIntMask) - addiu $t2, $t2, %lo(__OSGlobalIntMask) - lw $t3, ($t2) - andi $t3, $t3, SR_IMASK - mfc0 $t0, C0_SR - li $at, ~SR_IE - and $t1, $t0, $at - mtc0 $t1, C0_SR - andi $v0, $t0, SR_IE - lw $t0, ($t2) - andi $t0, $t0, SR_IMASK - beq $t0, $t3, No_Change_Global_Int - lui $t2, %hi(__osRunningThread) - //! @bug this addiu should be lw, it may never come up in practice as to reach this code - //! the CPU bits of __OSGlobalIntMask must have changed while this function is running. - addiu $t2, $t2, %lo(__osRunningThread) - lw $t1, THREAD_SR($t2) - andi $t2, $t1, SR_IMASK - and $t2, $t2, $t0 - li $at, ~SR_IMASK - and $t1, $t1, $at - or $t1, $t1, $t2 - li $at, ~SR_IE - and $t1, $t1, $at - mtc0 $t1, C0_SR - nop - nop + la t2, __OSGlobalIntMask + lw t3, (t2) + and t3, t3, SR_IMASK + MFC0( t0, C0_SR) + and t1, t0, ~SR_IE + MTC0( t1, C0_SR) + and v0, t0, SR_IE + lw t0, (t2) + and t0, t0, SR_IMASK +.set noreorder + beq t0, t3, No_Change_Global_Int + /*! @bug this la should be lw, it may never come up in practice as to reach this code + *! the CPU bits of __OSGlobalIntMask must have changed while this function is running. + */ + la t2, __osRunningThread + lw t1, THREAD_SR(t2) + and t2, t1, SR_IMASK + and t2, t2, t0 +.set reorder + and t1, t1, ~SR_IMASK + or t1, t1, t2 + and t1, t1, ~SR_IE + MTC0( t1, C0_SR) + NOP + NOP No_Change_Global_Int: - jr $ra - nop + jr ra END(__osDisableInt) LEAF(__osRestoreInt) - mfc0 $t0, C0_SR - or $t0, $t0, $a0 - mtc0 $t0, C0_SR - nop - nop - jr $ra - nop + MFC0( t0, C0_SR) + or t0, t0, a0 + MTC0( t0, C0_SR) + NOP + NOP + jr ra END(__osRestoreInt) diff --git a/src/libultra/os/invaldcache.s b/src/libultra/os/invaldcache.s index 9464a24da6..22109e13aa 100644 --- a/src/libultra/os/invaldcache.s +++ b/src/libultra/os/invaldcache.s @@ -1,12 +1,8 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" -.set noat -.set noreorder - -.section .text - -.balign 16 +.text /** * void osInvalDCache(void* vaddr, s32 nbytes); @@ -24,71 +20,67 @@ * the entire data cache is invalidated. */ LEAF(osInvalDCache) - // If the amount to invalidate is less than or equal to 0, return immediately - blez $a1, 3f +.set noreorder + /* If the amount to invalidate is less than or equal to 0, return immediately */ + blez a1, 3f nop - // If the amount to invalidate is as large as or larger than - // the data cache size, invalidate all - li $t3, DCACHE_SIZE - sltu $at, $a1, $t3 - beqz $at, 4f + /* If the amount to invalidate is as large as or larger than + * the data cache size, invalidate all */ + li t3, DCACHE_SIZE + bgeu a1, t3, 4f nop - // Ensure end address doesn't wrap around and end up smaller - // than the start address - move $t0, $a0 - addu $t1, $a0, $a1 - sltu $at, $t0, $t1 - beqz $at, 3f + /* Ensure end address doesn't wrap around and end up smaller + * than the start address */ + move t0, a0 + addu t1, a0, a1 + bgeu t0, t1, 3f nop - // Mask start with cache line - andi $t2, $t0, DCACHE_LINEMASK - // If mask is not zero, the start is not cache aligned - beqz $t2, 1f - addiu $t1, $t1, -DCACHE_LINESIZE - // Subtract mask result to align to cache line - subu $t0, $t0, $t2 - // Hit-Writeback-Invalidate unaligned part - cache (CACH_PD | C_HWBINV), ($t0) - sltu $at, $t0, $t1 - // If that's all there is to do, return early - beqz $at, 3f + /* Mask start with cache line */ + andi t2, t0, DCACHE_LINEMASK + /* If mask is not zero, the start is not cache aligned */ + beqz t2, 1f + addiu t1, t1, -DCACHE_LINESIZE + /* Subtract mask result to align to cache line */ + subu t0, t0, t2 + /* Hit-Writeback-Invalidate unaligned part */ + cache (CACH_PD | C_HWBINV), (t0) + /* If that's all there is to do, return early */ + bgeu t0, t1, 3f nop - addiu $t0, $t0, DCACHE_LINESIZE + addiu t0, t0, DCACHE_LINESIZE 1: - // Mask end with cache line - andi $t2, $t1, DCACHE_LINEMASK - // If mask is not zero, the end is not cache aligned - beqz $t2, 1f + /* Mask end with cache line */ + andi t2, t1, DCACHE_LINEMASK + /* If mask is not zero, the end is not cache aligned */ + beqz t2, 1f nop - // Subtract mask result to align to cache line - subu $t1, $t1, $t2 - // Hit-Writeback-Invalidate unaligned part - cache (CACH_PD | C_HWBINV), DCACHE_LINESIZE($t1) - sltu $at, $t1, $t0 - // If that's all there is to do, return early - bnez $at, 3f + /* Subtract mask result to align to cache line */ + subu t1, t1, t2 + /* Hit-Writeback-Invalidate unaligned part */ + cache (CACH_PD | C_HWBINV), DCACHE_LINESIZE(t1) + /* If that's all there is to do, return early */ + bltu t1, t0, 3f nop - // Invalidate the rest + /* Invalidate the rest */ 1: - // Hit-Invalidate - cache (CACH_PD | C_HINV), ($t0) - sltu $at, $t0, $t1 - bnez $at, 1b - addiu $t0, $t0, DCACHE_LINESIZE + /* Hit-Invalidate */ + cache (CACH_PD | C_HINV), (t0) + bltu t0, t1, 1b + addiu t0, t0, DCACHE_LINESIZE 3: - jr $ra + jr ra nop 4: - li $t0, K0BASE - addu $t1, $t0, $t3 - addiu $t1, $t1, -DCACHE_LINESIZE + li t0, K0BASE + addu t1, t0, t3 + addiu t1, t1, -DCACHE_LINESIZE 5: - // Index-Writeback-Invalidate - cache (CACH_PD | C_IWBINV), ($t0) - sltu $at, $t0, $t1 - bnez $at, 5b - addiu $t0, DCACHE_LINESIZE - jr $ra + /* Index-Writeback-Invalidate */ + cache (CACH_PD | C_IWBINV), (t0) + bltu t0, t1, 5b + addiu t0, DCACHE_LINESIZE + jr ra nop +.set reorder END(osInvalDCache) diff --git a/src/libultra/os/invalicache.s b/src/libultra/os/invalicache.s index 55f8312380..757215cf19 100644 --- a/src/libultra/os/invalicache.s +++ b/src/libultra/os/invalicache.s @@ -1,52 +1,46 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" -.set noat -.set noreorder - -.section .text - -.balign 16 +.text LEAF(osInvalICache) - // If the amount to invalidate is less than or equal to 0, return immediately - blez $a1, 2f +.set noreorder + /* If the amount to invalidate is less than or equal to 0, return immediately */ + blez a1, 2f nop - // If the amount to invalidate is as large as or larger than - // the instruction cache size, invalidate all - li $t3, ICACHE_SIZE - sltu $at, $a1, $t3 - beqz $at, 3f + /* If the amount to invalidate is as large as or larger than */ + /* the instruction cache size, invalidate all */ + li t3, ICACHE_SIZE + bgeu a1, t3, 3f nop - // ensure end address doesn't wrap around and end up smaller - // than the start address - move $t0, $a0 - addu $t1, $a0, $a1 - sltu $at, $t0, $t1 - beqz $at, 2f + /* ensure end address doesn't wrap around and end up smaller */ + /* than the start address */ + move t0, a0 + addu t1, a0, a1 + bgeu t0, t1, 2f nop - // Mask and subtract to align to cache line - andi $t2, $t0, ICACHE_LINEMASK - addiu $t1, $t1, -ICACHE_LINESIZE - subu $t0, $t0, $t2 + /* Mask and subtract to align to cache line */ + andi t2, t0, ICACHE_LINEMASK + addiu t1, t1, -ICACHE_LINESIZE + subu t0, t0, t2 1: - cache (CACH_PI | C_HINV), ($t0) - sltu $at, $t0, $t1 - bnez $at, 1b - addiu $t0, $t0, ICACHE_LINESIZE + cache (CACH_PI | C_HINV), (t0) + bltu t0, t1, 1b + addiu t0, t0, ICACHE_LINESIZE 2: - jr $ra + jr ra nop 3: - li $t0, K0BASE - addu $t1, $t0, $t3 - addiu $t1, $t1, -ICACHE_LINESIZE + li t0, K0BASE + addu t1, t0, t3 + addiu t1, t1, -ICACHE_LINESIZE 4: - cache (CACH_PI | C_IINV), ($t0) - sltu $at, $t0, $t1 - bnez $at, 4b - addiu $t0, ICACHE_LINESIZE - jr $ra + cache (CACH_PI | C_IINV), (t0) + bltu t0, t1, 4b + addiu t0, ICACHE_LINESIZE + jr ra nop +.set reorder END(osInvalICache) diff --git a/src/libultra/os/maptlbrdb.s b/src/libultra/os/maptlbrdb.s index c3ac20948b..43e7cc93a3 100644 --- a/src/libultra/os/maptlbrdb.s +++ b/src/libultra/os/maptlbrdb.s @@ -1,36 +1,31 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" #include "ultra64/rdb.h" -.set noreorder - -.section .text - -.balign 16 +.text LEAF(osMapTLBRdb) - mfc0 $t0, C0_ENTRYHI - li $t1, NTLBENTRIES - mtc0 $t1, C0_INX - mtc0 $zero, C0_PAGEMASK - li $t2, (TLBLO_UNCACHED | TLBLO_D | TLBLO_V | TLBLO_G) - li $t1, (RDB_BASE_REG & TLBHI_VPN2MASK) - mtc0 $t1, C0_ENTRYHI - // Possible bug? Virtual address instead of physical address - // set as page frame number - li $t1, RDB_BASE_VIRTUAL_ADDR - srl $t3, $t1, TLBLO_PFNSHIFT - or $t3, $t3, $t2 - mtc0 $t3, C0_ENTRYLO0 - li $t1, TLBLO_G - mtc0 $t1, C0_ENTRYLO1 - nop - tlbwi - nop - nop - nop - nop - mtc0 $t0, C0_ENTRYHI - jr $ra - nop + MFC0( t0, C0_ENTRYHI) + li t1, NTLBENTRIES + MTC0( t1, C0_INX) + MTC0( zero, C0_PAGEMASK) + li t2, (TLBLO_UNCACHED | TLBLO_D | TLBLO_V | TLBLO_G) + li t1, (RDB_BASE_REG & TLBHI_VPN2MASK) + MTC0( t1, C0_ENTRYHI) + /* Possible bug? Virtual address instead of physical address set as page frame number */ + li t1, RDB_BASE_VIRTUAL_ADDR + srl t3, t1, TLBLO_PFNSHIFT + or t3, t3, t2 + MTC0( t3, C0_ENTRYLO0) + li t1, TLBLO_G + MTC0( t1, C0_ENTRYLO1) + NOP + TLBWI + NOP + NOP + NOP + NOP + MTC0( t0, C0_ENTRYHI) + jr ra END(osMapTLBRdb) diff --git a/src/libultra/os/parameters.s b/src/libultra/os/parameters.s index c0020ddbed..b881148144 100644 --- a/src/libultra/os/parameters.s +++ b/src/libultra/os/parameters.s @@ -1,22 +1,29 @@ #include "ultra64/asm.h" -.section .text +.text -.macro IPL_SYMBOL name, address, size - .global \name - .set \name, \address - .type \name, @object - .size \name, \size -.endm +#ifdef __sgi +#define IPL_SYMBOL(name, address, size) \ + ABS(name, address) +#else +#define IPL_SYMBOL(name, address, sz) \ + ABS(name, address) ;\ + .type name, @object ;\ + .size name, sz +#endif -IPL_SYMBOL leoBootID, 0x800001A0, 4 -IPL_SYMBOL osTvType, 0x80000300, 4 -IPL_SYMBOL osRomType, 0x80000304, 4 -IPL_SYMBOL osRomBase, 0x80000308, 4 -IPL_SYMBOL osResetType, 0x8000030C, 4 -IPL_SYMBOL osCicId, 0x80000310, 4 -IPL_SYMBOL osVersion, 0x80000314, 4 -IPL_SYMBOL osMemSize, 0x80000318, 4 -IPL_SYMBOL osAppNMIBuffer, 0x8000031C, 0x40 -.fill 0x60 +IPL_SYMBOL(leoBootID, 0x800001A0, 4) +IPL_SYMBOL(osTvType, 0x80000300, 4) +IPL_SYMBOL(osRomType, 0x80000304, 4) +IPL_SYMBOL(osRomBase, 0x80000308, 4) +IPL_SYMBOL(osResetType, 0x8000030C, 4) +IPL_SYMBOL(osCicId, 0x80000310, 4) +IPL_SYMBOL(osVersion, 0x80000314, 4) +IPL_SYMBOL(osMemSize, 0x80000318, 4) +IPL_SYMBOL(osAppNMIBuffer, 0x8000031C, 0x40) + + +.repeat 0x60/4 + NOP +.endr diff --git a/src/libultra/os/probetlb.s b/src/libultra/os/probetlb.s index 7bc7856e13..4fa8abc38e 100644 --- a/src/libultra/os/probetlb.s +++ b/src/libultra/os/probetlb.s @@ -1,12 +1,8 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" -.set noat -.set noreorder - -.section .text - -.balign 16 +.text /** * u32 __osProbeTLB(void* vaddr); @@ -17,70 +13,71 @@ * Returns the physical address if found, or -1 if not found. */ LEAF(__osProbeTLB) - // Set C0_ENTRYHI based on supplied vaddr - mfc0 $t0, C0_ENTRYHI - andi $t1, $t0, TLBHI_PIDMASK - li $at, TLBHI_VPN2MASK - and $t2, $a0, $at - or $t1, $t1, $t2 - mtc0 $t1, C0_ENTRYHI +.set noreorder + /* Set C0_ENTRYHI based on supplied vaddr */ + mfc0 t0, C0_ENTRYHI + and t1, t0, TLBHI_PIDMASK + and t2, a0, TLBHI_VPN2MASK + or t1, t1, t2 + mtc0 t1, C0_ENTRYHI nop nop nop - // TLB probe, sets C0_INX to a value matching C0_ENTRYHI. - // If no match is found the TLBINX_PROBE bit is set to indicate this. + /* TLB probe, sets C0_INX to a value matching C0_ENTRYHI. */ + /* If no match is found the TLBINX_PROBE bit is set to indicate this. */ tlbp nop nop - // Read result - mfc0 $t3, C0_INX - li $at, TLBINX_PROBE - and $t3, $t3, $at - // Branch if no match was found - bnez $t3, 3f + /* Read result */ + mfc0 t3, C0_INX + and t3, t3, TLBINX_PROBE + /* Branch if no match was found */ + bnez t3, 3f nop - // Read TLB, sets C0_ENTRYHI, C0_ENTRYLO0, C0_ENTRYLO1 and C0_PAGEMASK for the TLB - // entry indicated by C0_INX + + /* Read TLB, sets C0_ENTRYHI, C0_ENTRYLO0, C0_ENTRYLO1 and C0_PAGEMASK for the TLB */ + /* entry indicated by C0_INX */ tlbr nop nop nop - // Calculate page size = (page mask + 0x2000) >> 1 - mfc0 $t3, C0_PAGEMASK - addi $t3, $t3, 0x2000 - srl $t3, $t3, 1 - // & with vaddr - and $t4, $t3, $a0 - // Select C0_ENTRYLO0 or C0_ENTRYLO1 - bnez $t4, 1f - addi $t3, $t3, -1 // make bitmask out of page size - mfc0 $v0, C0_ENTRYLO0 + /* Calculate page size = (page mask + 0x2000) >> 1 */ + mfc0 t3, C0_PAGEMASK + add t3, t3, 0x2000 + srl t3, t3, 1 + /* & with vaddr */ + and t4, t3, a0 + /* Select C0_ENTRYLO0 or C0_ENTRYLO1 */ + bnez t4, 1f + add t3, t3, -1 /* make bitmask out of page size */ + mfc0 v0, C0_ENTRYLO0 b 2f nop 1: - mfc0 $v0, C0_ENTRYLO1 + mfc0 v0, C0_ENTRYLO1 2: - // Check valid bit and branch if not valid - andi $t5, $v0, TLBLO_V - beqz $t5, 3f + /* Check valid bit and branch if not valid */ + and t5, v0, TLBLO_V + beqz t5, 3f nop - // Extract the Page Frame Number from the entry - li $at, TLBLO_PFNMASK - and $v0, $v0, $at - sll $v0, $v0, TLBLO_PFNSHIFT - // Mask vaddr with page size mask - and $t5, $a0, $t3 - // Add masked vaddr to pfn to obtain the physical address - add $v0, $v0, $t5 + + /* Extract the Page Frame Number from the entry */ + and v0, v0, TLBLO_PFNMASK + sll v0, v0, TLBLO_PFNSHIFT + /* Mask vaddr with page size mask */ + and t5, a0, t3 + /* Add masked vaddr to pfn to obtain the physical address */ + add v0, v0, t5 b 4f nop 3: - // No physical address for the supplied virtual address was found, - // return -1 - li $v0, -1 + /* No physical address for the supplied virtual address was found, */ + /* return -1 */ + li v0, -1 4: - // Restore original C0_ENTRYHI value before returning - mtc0 $t0, C0_ENTRYHI - jr $ra + /* Restore original C0_ENTRYHI value before returning */ + mtc0 t0, C0_ENTRYHI + jr ra nop +.set reorder END(__osProbeTLB) diff --git a/src/libultra/os/setcompare.s b/src/libultra/os/setcompare.s index bd16533774..3c175c4943 100644 --- a/src/libultra/os/setcompare.s +++ b/src/libultra/os/setcompare.s @@ -1,14 +1,10 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" -.set noreorder - -.section .text - -.balign 16 +.text LEAF(__osSetCompare) - mtc0 $a0, C0_COMPARE - jr $ra - nop + MTC0( a0, C0_COMPARE) + jr ra END(__osSetCompare) diff --git a/src/libultra/os/setfpccsr.s b/src/libultra/os/setfpccsr.s index 1aa0e7d298..e333550844 100644 --- a/src/libultra/os/setfpccsr.s +++ b/src/libultra/os/setfpccsr.s @@ -1,15 +1,11 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" -.set noreorder - -.section .text - -.balign 16 +.text LEAF(__osSetFpcCsr) - cfc1 $v0, C1_FPCSR - ctc1 $a0, C1_FPCSR - jr $ra - nop + CFC1( v0, C1_FPCSR) + CTC1( a0, C1_FPCSR) + jr ra END(__osSetFpcCsr) diff --git a/src/libultra/os/setintmask.s b/src/libultra/os/setintmask.s index e48c1b376f..bf6bec020b 100644 --- a/src/libultra/os/setintmask.s +++ b/src/libultra/os/setintmask.s @@ -1,14 +1,11 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" #include "ultra64/rcp.h" #include "ultra64/exception.h" -.set noat -.set noreorder - -.section .rodata - -.balign 16 +.rdata +.align 2 /** * LUT to convert between an interrupt mask value and a value for MI_INTR_MASK_REG. @@ -82,9 +79,7 @@ DATA(__osRcpImTable) .half MI_INTR_MASK_SET_SP | MI_INTR_MASK_SET_SI | MI_INTR_MASK_SET_AI | MI_INTR_MASK_SET_VI | MI_INTR_MASK_SET_PI | MI_INTR_MASK_SET_DP ENDDATA(__osRcpImTable) -.section .text - -.balign 16 +.text /** * OSIntMask osSetIntMask(OSIntMask); @@ -113,56 +108,47 @@ ENDDATA(__osRcpImTable) * OS_IM_ALL, so the operation is usually simply (SR | 0). */ LEAF(osSetIntMask) - // Extract interrupt enable bits from current SR - mfc0 $t4, C0_SR - andi $v0, $t4, (SR_IMASK | SR_IE) - // Get value of __OSGlobalIntMask - lui $t0, %hi(__OSGlobalIntMask) - addiu $t0, %lo(__OSGlobalIntMask) - lw $t3, ($t0) - // Bitwise-OR in the disabled CPU bits of __OSGlobalIntMask - li $at, ~0 - xor $t0, $t3, $at - andi $t0, $t0, SR_IMASK - or $v0, $v0, $t0 - // Fetch MI_INTR_MASK_REG - lui $t2, %hi(PHYS_TO_K1(MI_INTR_MASK_REG)) - lw $t2, %lo(PHYS_TO_K1(MI_INTR_MASK_REG))($t2) - // If there are RCP interrupts masked - beqz $t2, 1f - srl $t1, $t3, RCP_IMASKSHIFT - // Bitwise-OR in the disabled RCP bits of __OSGlobalIntMask - li $at, ~0 - xor $t1, $t1, $at - andi $t1, $t1, (RCP_IMASK >> RCP_IMASKSHIFT) - or $t2, $t2, $t1 + /* Extract interrupt enable bits from current SR */ + MFC0( t4, C0_SR) +.set noreorder + and v0, t4, (SR_IMASK | SR_IE) + /* Get value of __OSGlobalIntMask */ + la t0, __OSGlobalIntMask + lw t3, (t0) + /* Bitwise-OR in the disabled CPU bits of __OSGlobalIntMask */ + xor t0, t3, ~0 + and t0, t0, SR_IMASK + or v0, v0, t0 + /* Fetch MI_INTR_MASK_REG */ + lw t2, PHYS_TO_K1(MI_INTR_MASK_REG) + /* If there are RCP interrupts masked */ + beqz t2, 1f + srl t1, t3, RCP_IMASKSHIFT + /* Bitwise-OR in the disabled RCP bits of __OSGlobalIntMask */ + xor t1, t1, ~0 + and t1, t1, (RCP_IMASK >> RCP_IMASKSHIFT) + or t2, t2, t1 1: - // Shift the RCP bits to not conflict with the CPU bits - sll $t2, $t2, RCP_IMASKSHIFT - // OR the CPU and RCP bits together - or $v0, $v0, $t2 - // Extract RCP interrupt enable bits from requested mask and mask with __OSGlobalIntMask - li $at, RCP_IMASK - and $t0, $a0, $at - and $t0, $t0, $t3 - // Convert to a value for MI_INTR_MASK_REG and set it - srl $t0, $t0, (RCP_IMASKSHIFT-1) - lui $t2, %hi(__osRcpImTable) - addu $t2, $t2, $t0 - lhu $t2, %lo(__osRcpImTable)($t2) - lui $at, %hi(PHYS_TO_K1(MI_INTR_MASK_REG)) - sw $t2, %lo(PHYS_TO_K1(MI_INTR_MASK_REG))($at) - // Extract CPU interrupt enable bits from requested mask and mask with __OSGlobalIntMask - andi $t0, $a0, OS_IM_CPU - andi $t1, $t3, SR_IMASK - and $t0, $t0, $t1 - li $at, ~SR_IMASK - and $t4, $t4, $at - // Bitwise OR in the remaining bits of SR and set new SR - or $t4, $t4, $t0 - mtc0 $t4, C0_SR - nop - nop - jr $ra - nop + /* Shift the RCP bits to not conflict with the CPU bits */ + sll t2, t2, RCP_IMASKSHIFT + /* OR the CPU and RCP bits together */ + or v0, v0, t2 + /* Extract RCP interrupt enable bits from requested mask and mask with __OSGlobalIntMask */ + and t0, a0, RCP_IMASK + and t0, t0, t3 + /* Convert to a value for MI_INTR_MASK_REG and set it */ + srl t0, t0, (RCP_IMASKSHIFT-1) + lhu t2, __osRcpImTable(t0) + sw t2, PHYS_TO_K1(MI_INTR_MASK_REG) + /* Extract CPU interrupt enable bits from requested mask and mask with __OSGlobalIntMask */ + and t0, a0, OS_IM_CPU + and t1, t3, SR_IMASK + and t0, t0, t1 + and t4, t4, ~SR_IMASK + /* Bitwise OR in the remaining bits of SR and set new SR */ + or t4, t4, t0 + MTC0( t4, C0_SR) + NOP + NOP + jr ra END(osSetIntMask) diff --git a/src/libultra/os/setsr.s b/src/libultra/os/setsr.s index b754359eae..fb7a1586d4 100644 --- a/src/libultra/os/setsr.s +++ b/src/libultra/os/setsr.s @@ -1,15 +1,11 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" -.set noreorder - -.section .text - -.balign 16 +.text LEAF(__osSetSR) - mtc0 $a0, C0_SR - nop - jr $ra - nop + MTC0( a0, C0_SR) + NOP + jr ra END(__osSetSR) diff --git a/src/libultra/os/setwatchlo.s b/src/libultra/os/setwatchlo.s index aee3ad4d6d..962513a480 100644 --- a/src/libultra/os/setwatchlo.s +++ b/src/libultra/os/setwatchlo.s @@ -1,15 +1,11 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" -.set noreorder - -.section .text - -.balign 16 +.text LEAF(__osSetWatchLo) - mtc0 $a0, C0_WATCHLO - nop - jr $ra - nop + MTC0( a0, C0_WATCHLO) + NOP + jr ra END(__osSetWatchLo) diff --git a/src/libultra/os/unmaptlball.s b/src/libultra/os/unmaptlball.s index e1a03b1a23..8cd79984ec 100644 --- a/src/libultra/os/unmaptlball.s +++ b/src/libultra/os/unmaptlball.s @@ -1,29 +1,25 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" -.set noreorder - -.section .text - -.balign 16 +.text LEAF(osUnmapTLBAll) - mfc0 $t0, C0_ENTRYHI - li $t1, (NTLBENTRIES - 1) - li $t2, (K0BASE & TLBHI_VPN2MASK) - mtc0 $t2, C0_ENTRYHI - mtc0 $zero, C0_ENTRYLO0 - mtc0 $zero, C0_ENTRYLO1 + MFC0( t0, C0_ENTRYHI) + li t1, (NTLBENTRIES - 1) + li t2, (K0BASE & TLBHI_VPN2MASK) + MTC0( t2, C0_ENTRYHI) + MTC0( zero, C0_ENTRYLO0) + MTC0( zero, C0_ENTRYLO1) 1: - mtc0 $t1, C0_INX - nop - tlbwi - nop - nop - addi $t1, $t1, -1 - bgez $t1, 1b - nop - mtc0 $t0, C0_ENTRYHI - jr $ra - nop + MTC0( t1, C0_INX) + NOP + TLBWI + NOP + NOP + addi t1, t1, -1 + bgez t1, 1b + + MTC0( t0, C0_ENTRYHI) + jr ra END(osUnmapTLBAll) diff --git a/src/libultra/os/writebackdcache.s b/src/libultra/os/writebackdcache.s index 829f6f4a05..c7207c7e56 100644 --- a/src/libultra/os/writebackdcache.s +++ b/src/libultra/os/writebackdcache.s @@ -1,12 +1,8 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" -.set noat -.set noreorder - -.section .text - -.balign 16 +.text /** * void osWritebackDCache(void* vaddr, s32 nbytes); @@ -16,45 +12,43 @@ * written back. */ LEAF(osWritebackDCache) - // If the amount to write back is less than or equal to 0, return immediately - blez $a1, .ret - nop - // If the amount to write back is as large as or larger than - // the data cache size, write back all - li $t3, DCACHE_SIZE - sltu $at, $a1, $t3 - beqz $at, .all - nop - // ensure end address doesn't wrap around and end up smaller - // than the start address - move $t0, $a0 - addu $t1, $a0, $a1 - sltu $at, $t0, $t1 - beqz $at, .ret - nop - // Mask and subtract to align to cache line - andi $t2, $t0, DCACHE_LINEMASK - addiu $t1, $t1, -DCACHE_LINESIZE - subu $t0, $t0, $t2 -1: - cache (CACH_PD | C_HWB), ($t0) - sltu $at, $t0, $t1 - bnez $at, 1b - addiu $t0, $t0, DCACHE_LINESIZE -.ret: - jr $ra - nop + /* If the amount to write back is less than or equal to 0, return immediately */ + blez a1, .ret -// same as osWritebackDCacheAll in operation -.all: - li $t0, K0BASE - addu $t1, $t0, $t3 - addiu $t1, $t1, -DCACHE_LINESIZE + /* If the amount to write back is as large as or larger than */ + /* the data cache size, write back all */ + li t3, DCACHE_SIZE + bgeu a1, t3, .all + + /* ensure end address doesn't wrap around and end up smaller */ + /* than the start address */ + move t0, a0 + addu t1, a0, a1 + bgeu t0, t1, .ret + + /* Mask and subtract to align to cache line */ + andi t2, t0, DCACHE_LINEMASK + addiu t1, t1, -DCACHE_LINESIZE + subu t0, t0, t2 1: - cache (CACH_PD | C_IWBINV), ($t0) - sltu $at, $t0, $t1 - bnez $at, 1b - addiu $t0, DCACHE_LINESIZE - jr $ra - nop +.set noreorder + cache (CACH_PD | C_HWB), (t0) + bltu t0, t1, 1b + addiu t0, t0, DCACHE_LINESIZE +.set reorder +.ret: + jr ra + +/* same as osWritebackDCacheAll in operation */ +.all: + li t0, K0BASE + addu t1, t0, t3 + addiu t1, t1, -DCACHE_LINESIZE +1: +.set noreorder + cache (CACH_PD | C_IWBINV), (t0) + bltu t0, t1, 1b + addiu t0, DCACHE_LINESIZE +.set reorder + jr ra END(osWritebackDCache) diff --git a/src/libultra/os/writebackdcacheall.s b/src/libultra/os/writebackdcacheall.s index 54a58dff8a..a15ef4554f 100644 --- a/src/libultra/os/writebackdcacheall.s +++ b/src/libultra/os/writebackdcacheall.s @@ -1,23 +1,19 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "ultra64/R4300.h" -.set noat -.set noreorder - -.section .text - -.balign 16 +.text LEAF(osWritebackDCacheAll) - li $t0, K0BASE - li $t2, DCACHE_SIZE - addu $t1, $t0, $t2 - addiu $t1, $t1, -DCACHE_LINESIZE + li t0, K0BASE + li t2, DCACHE_SIZE + addu t1, t0, t2 + addiu t1, t1, -DCACHE_LINESIZE 1: - cache (CACH_PD | C_IWBINV), ($t0) - sltu $at, $t0, $t1 - bnez $at, 1b - addiu $t0, DCACHE_LINESIZE - jr $ra - nop +.set noreorder + cache (CACH_PD | C_IWBINV), (t0) + bltu t0, t1, 1b + addiu t0, DCACHE_LINESIZE +.set reorder + jr ra END(osWritebackDCacheAll) diff --git a/src/makerom/entry.s b/src/makerom/entry.s index 6be493e6b6..0a1233d80e 100644 --- a/src/makerom/entry.s +++ b/src/makerom/entry.s @@ -1,34 +1,44 @@ #include "ultra64/asm.h" +#include "ultra64/regdef.h" #include "boot.h" -.set noreorder +.text -.section .text - -.balign 16 +#if defined(__sgi) && !defined(AVOID_UB) +/* IDO assembler workaround: The makerom tool in the N64 SDK was given the bss segment size as a const + * literal, and since this literal was < 0x10000 it was loaded in one instruction. We don't have access + * to the bss segment size until we link everything so we cannot do the same thing. Instead we must load + * only the lower 16 bits of the bss size for matching. + * When AVOID_UB is enabled, don't do this and instead load the full symbol value, otherwise not all of + * bss may be cleared. */ +#define LOAD_BSS_SIZE(reg) li reg, %half(_bootSegmentBssSize) +#else +#define LOAD_BSS_SIZE(reg) la reg, _bootSegmentBssSize +#endif LEAF(entrypoint) - // Clear boot segment .bss - la $t0, _bootSegmentBssStart -#ifndef AVOID_UB - // UB: li only loads the lower 16 bits of _bootSegmentBssSize when it may be larger than this, - // so not all of bss may be cleared if it is too large - li $t1, _bootSegmentBssSize -#else - la $t1, _bootSegmentBssSize -#endif + /* Clear boot segment .bss */ + la t0, _bootSegmentBssStart + LOAD_BSS_SIZE(t1) .clear_bss: - addi $t1, $t1, -8 - sw $zero, ($t0) - sw $zero, 4($t0) - bnez $t1, .clear_bss - addi $t0, $t0, 8 - // Set up stack and enter program code - lui $t2, %hi(bootproc) - lui $sp, %hi(sBootThreadStack + BOOT_STACK_SIZE) - addiu $t2, %lo(bootproc) - jr $t2 - addiu $sp, %lo(sBootThreadStack + BOOT_STACK_SIZE) + sw zero, 0(t0) + sw zero, 4(t0) + addi t0, t0, 8 + addi t1, t1, -8 + bnez t1, .clear_bss + + /* Set up stack and enter program code */ + la sp, sBootThreadStack + BOOT_STACK_SIZE + la t2, bootproc + jr t2 END(entrypoint) +#ifdef __GNUC__ +/* Pad to a total size of 0x60 */ .fill 0x60 - (. - entrypoint) +#else +/* IDO can't take absolute differences of symbols.. */ +.repeat (0x60 - 0x34) + .byte 0 +.endr +#endif diff --git a/src/makerom/ipl3.s b/src/makerom/ipl3.s deleted file mode 100644 index 45f1c8e6f8..0000000000 --- a/src/makerom/ipl3.s +++ /dev/null @@ -1,4 +0,0 @@ - -.section .text - -.incbin "incbin/ipl3"