mirror of
https://github.com/zeldaret/oot.git
synced 2025-01-15 04:36:59 +00:00
7334ffa373
* Format all handwritten asm and document some * Use c preprocessor for constants * Fix * Fix PI_STATUS_ERROR, some label improvements * Avoid hi/lo for constants * Some more comments * Properly mark functions as functions and their sizes * Fix merge * Improvements * Review suggestions, rework procedure start/end macros to be more like libreultra * Move IPL3 symbol definitions into ipl3.s * Fix undefined_syms, add include and language guards to asm.h and fix the comment in gbi.h * Consistent hex capitalization, add some MIPS builtin defines to CC_CHECK to behave properly * Add -no-pad-sections assembler option and clean up alignment in gu files and bzero * Further suggestions and improvements * Matrix conversion function clarifications * Fix passing AVOID_UB to gcc * Suggestions * Suggestions, global interrupt mask improvements * Further suggestions, interrupt mask comments * Comments fixes, rdb.h * Switch from # comments to // comments, remove unnecesary .set gp=64 directives * Further review suggestions * Missed one
233 lines
5.3 KiB
ArmAsm
233 lines
5.3 KiB
ArmAsm
#include "ultra64/asm.h"
|
|
|
|
.set noat
|
|
.set noreorder
|
|
|
|
.section .text
|
|
|
|
.balign 16
|
|
|
|
LEAF(bcopy)
|
|
beqz $a2, ret
|
|
move $a3, $a1
|
|
beq $a0, $a1, ret
|
|
slt $at, $a1, $a0
|
|
bnezl $at, goforwards
|
|
slti $at, $a2, 0x10
|
|
add $v0, $a0, $a2
|
|
slt $at, $a1, $v0
|
|
beql $at, $zero, goforwards
|
|
slti $at, $a2, 0x10
|
|
b gobackwards
|
|
slti $at, $a2, 0x10
|
|
slti $at, $a2, 0x10
|
|
goforwards:
|
|
bnez $at, forwards_bytecopy
|
|
nop
|
|
andi $v0, $a0, 3
|
|
andi $v1, $a1, 3
|
|
beq $v0, $v1, forwalignable
|
|
nop
|
|
forwards_bytecopy:
|
|
beqz $a2, ret
|
|
nop
|
|
addu $v1, $a0, $a2
|
|
99:
|
|
lb $v0, ($a0)
|
|
addiu $a0, $a0, 1
|
|
addiu $a1, $a1, 1
|
|
bne $a0, $v1, 99b
|
|
sb $v0, -1($a1)
|
|
ret:
|
|
jr $ra
|
|
move $v0, $a3
|
|
|
|
forwalignable:
|
|
beqz $v0, forwards_32
|
|
li $at, 1
|
|
beq $v0, $at, forw_copy3
|
|
li $at, 2
|
|
beql $v0, $at, forw_copy2
|
|
lh $v0, ($a0)
|
|
lb $v0, ($a0)
|
|
addiu $a0, $a0, 1
|
|
addiu $a1, $a1, 1
|
|
addiu $a2, $a2, -1
|
|
b forwards_32
|
|
sb $v0, -1($a1)
|
|
lh $v0, ($a0)
|
|
forw_copy2:
|
|
addiu $a0, $a0, 2
|
|
addiu $a1, $a1, 2
|
|
addiu $a2, $a2, -2
|
|
b forwards_32
|
|
sh $v0, -2($a1)
|
|
forw_copy3:
|
|
lb $v0, ($a0)
|
|
lh $v1, 1($a0)
|
|
addiu $a0, $a0, 3
|
|
addiu $a1, $a1, 3
|
|
addiu $a2, $a2, -3
|
|
sb $v0, -3($a1)
|
|
sh $v1, -2($a1)
|
|
|
|
forwards:
|
|
forwards_32:
|
|
slti $at, $a2, 0x20
|
|
bnezl $at, forwards_16_
|
|
slti $at, $a2, 0x10
|
|
lw $v0, ($a0)
|
|
lw $v1, 4($a0)
|
|
lw $t0, 8($a0)
|
|
lw $t1, 0xC($a0)
|
|
lw $t2, 0x10($a0)
|
|
lw $t3, 0x14($a0)
|
|
lw $t4, 0x18($a0)
|
|
lw $t5, 0x1C($a0)
|
|
addiu $a0, $a0, 0x20
|
|
addiu $a1, $a1, 0x20
|
|
addiu $a2, $a2, -0x20
|
|
sw $v0, -0x20($a1)
|
|
sw $v1, -0x1C($a1)
|
|
sw $t0, -0x18($a1)
|
|
sw $t1, -0x14($a1)
|
|
sw $t2, -0x10($a1)
|
|
sw $t3, -0xC($a1)
|
|
sw $t4, -8($a1)
|
|
b forwards_32
|
|
sw $t5, -4($a1)
|
|
forwards_16:
|
|
slti $at, $a2, 0x10
|
|
forwards_16_: // fake label due to branch likely optimization
|
|
bnezl $at, forwards_4_
|
|
slti $at, $a2, 4
|
|
lw $v0, ($a0)
|
|
lw $v1, 4($a0)
|
|
lw $t0, 8($a0)
|
|
lw $t1, 0xC($a0)
|
|
addiu $a0, $a0, 0x10
|
|
addiu $a1, $a1, 0x10
|
|
addiu $a2, $a2, -0x10
|
|
sw $v0, -0x10($a1)
|
|
sw $v1, -0xC($a1)
|
|
sw $t0, -8($a1)
|
|
b forwards_16
|
|
sw $t1, -4($a1)
|
|
forwards_4:
|
|
slti $at, $a2, 4
|
|
forwards_4_: // fake label due to branch likely optimization
|
|
bnez $at, forwards_bytecopy
|
|
nop
|
|
lw $v0, ($a0)
|
|
addiu $a0, $a0, 4
|
|
addiu $a1, $a1, 4
|
|
addiu $a2, $a2, -4
|
|
b forwards_4
|
|
sw $v0, -4($a1)
|
|
slti $at, $a2, 0x10
|
|
gobackwards:
|
|
add $a0, $a0, $a2
|
|
bnez $at, backwards_bytecopy
|
|
add $a1, $a1, $a2
|
|
andi $v0, $a0, 3
|
|
andi $v1, $a1, 3
|
|
beq $v0, $v1, backalignable
|
|
nop
|
|
backwards_bytecopy:
|
|
beqz $a2, ret
|
|
nop
|
|
addiu $a0, $a0, -1
|
|
addiu $a1, $a1, -1
|
|
subu $v1, $a0, $a2
|
|
99:
|
|
lb $v0, ($a0)
|
|
addiu $a0, $a0, -1
|
|
addiu $a1, $a1, -1
|
|
bne $a0, $v1, 99b
|
|
sb $v0, 1($a1)
|
|
jr $ra
|
|
move $v0, $a3
|
|
|
|
backalignable:
|
|
beqz $v0, backwards_32
|
|
li $at, 3
|
|
beq $v0, $at, back_copy3
|
|
li $at, 2
|
|
beql $v0, $at, back_copy2
|
|
lh $v0, -2($a0)
|
|
lb $v0, -1($a0)
|
|
addiu $a0, $a0, -1
|
|
addiu $a1, $a1, -1
|
|
addiu $a2, $a2, -1
|
|
b backwards_32
|
|
sb $v0, ($a1)
|
|
lh $v0, -2($a0)
|
|
back_copy2:
|
|
addiu $a0, $a0, -2
|
|
addiu $a1, $a1, -2
|
|
addiu $a2, $a2, -2
|
|
b backwards_32
|
|
sh $v0, ($a1)
|
|
back_copy3:
|
|
lb $v0, -1($a0)
|
|
lh $v1, -3($a0)
|
|
addiu $a0, $a0, -3
|
|
addiu $a1, $a1, -3
|
|
addiu $a2, $a2, -3
|
|
sb $v0, 2($a1)
|
|
sh $v1, ($a1)
|
|
|
|
backwards:
|
|
backwards_32:
|
|
slti $at, $a2, 0x20
|
|
bnezl $at, backwards_16_
|
|
slti $at, $a2, 0x10
|
|
lw $v0, -4($a0)
|
|
lw $v1, -8($a0)
|
|
lw $t0, -0xc($a0)
|
|
lw $t1, -0x10($a0)
|
|
lw $t2, -0x14($a0)
|
|
lw $t3, -0x18($a0)
|
|
lw $t4, -0x1c($a0)
|
|
lw $t5, -0x20($a0)
|
|
addiu $a0, $a0, -0x20
|
|
addiu $a1, $a1, -0x20
|
|
addiu $a2, $a2, -0x20
|
|
sw $v0, 0x1C($a1)
|
|
sw $v1, 0x18($a1)
|
|
sw $t0, 0x14($a1)
|
|
sw $t1, 0x10($a1)
|
|
sw $t2, 0xC($a1)
|
|
sw $t3, 8($a1)
|
|
sw $t4, 4($a1)
|
|
b backwards_32
|
|
sw $t5, ($a1)
|
|
backwards_16:
|
|
slti $at, $a2, 0x10
|
|
backwards_16_: // fake label due to branch likely optimization
|
|
bnezl $at, backwards_4_
|
|
slti $at, $a2, 4
|
|
lw $v0, -4($a0)
|
|
lw $v1, -8($a0)
|
|
lw $t0, -0xC($a0)
|
|
lw $t1, -0x10($a0)
|
|
addiu $a0, $a0, -0x10
|
|
addiu $a1, $a1, -0x10
|
|
addiu $a2, $a2, -0x10
|
|
sw $v0, 0xC($a1)
|
|
sw $v1, 8($a1)
|
|
sw $t0, 4($a1)
|
|
b backwards_16
|
|
sw $t1, ($a1)
|
|
backwards_4:
|
|
slti $at, $a2, 4
|
|
backwards_4_: // fake label due to branch likely optimization
|
|
bnez $at, backwards_bytecopy
|
|
nop
|
|
lw $v0, -4($a0)
|
|
addiu $a0, $a0, -4
|
|
addiu $a1, $a1, -4
|
|
addiu $a2, $a2, -4
|
|
b backwards_4
|
|
sw $v0, ($a1)
|
|
END(bcopy)
|