1
0
mirror of https://github.com/zeldaret/oot.git synced 2024-09-21 04:24:43 +00:00
oot/asm/bcopy.s
Tharo 7334ffa373
Improve the state of handwritten assembly files (#865)
* Format all handwritten asm and document some

* Use c preprocessor for constants

* Fix

* Fix PI_STATUS_ERROR, some label improvements

* Avoid hi/lo for constants

* Some more comments

* Properly mark functions as functions and their sizes

* Fix merge

* Improvements

* Review suggestions, rework procedure start/end macros to be more like libreultra

* Move IPL3 symbol definitions into ipl3.s

* Fix undefined_syms, add include and language guards to asm.h and fix the comment in gbi.h

* Consistent hex capitalization, add some MIPS builtin defines to CC_CHECK to behave properly

* Add -no-pad-sections assembler option and clean up alignment in gu files and bzero

* Further suggestions and improvements

* Matrix conversion function clarifications

* Fix passing AVOID_UB to gcc

* Suggestions

* Suggestions, global interrupt mask improvements

* Further suggestions, interrupt mask comments

* Comments fixes, rdb.h

* Switch from # comments to // comments, remove unnecesary .set gp=64 directives

* Further review suggestions

* Missed one
2022-05-01 01:03:22 +02:00

234 lines
5.3 KiB
ArmAsm

#include "ultra64/asm.h"
.set noat
.set noreorder
.section .text
.balign 16
LEAF(bcopy)
beqz $a2, ret
move $a3, $a1
beq $a0, $a1, ret
slt $at, $a1, $a0
bnezl $at, goforwards
slti $at, $a2, 0x10
add $v0, $a0, $a2
slt $at, $a1, $v0
beql $at, $zero, goforwards
slti $at, $a2, 0x10
b gobackwards
slti $at, $a2, 0x10
slti $at, $a2, 0x10
goforwards:
bnez $at, forwards_bytecopy
nop
andi $v0, $a0, 3
andi $v1, $a1, 3
beq $v0, $v1, forwalignable
nop
forwards_bytecopy:
beqz $a2, ret
nop
addu $v1, $a0, $a2
99:
lb $v0, ($a0)
addiu $a0, $a0, 1
addiu $a1, $a1, 1
bne $a0, $v1, 99b
sb $v0, -1($a1)
ret:
jr $ra
move $v0, $a3
forwalignable:
beqz $v0, forwards_32
li $at, 1
beq $v0, $at, forw_copy3
li $at, 2
beql $v0, $at, forw_copy2
lh $v0, ($a0)
lb $v0, ($a0)
addiu $a0, $a0, 1
addiu $a1, $a1, 1
addiu $a2, $a2, -1
b forwards_32
sb $v0, -1($a1)
lh $v0, ($a0)
forw_copy2:
addiu $a0, $a0, 2
addiu $a1, $a1, 2
addiu $a2, $a2, -2
b forwards_32
sh $v0, -2($a1)
forw_copy3:
lb $v0, ($a0)
lh $v1, 1($a0)
addiu $a0, $a0, 3
addiu $a1, $a1, 3
addiu $a2, $a2, -3
sb $v0, -3($a1)
sh $v1, -2($a1)
forwards:
forwards_32:
slti $at, $a2, 0x20
bnezl $at, forwards_16_
slti $at, $a2, 0x10
lw $v0, ($a0)
lw $v1, 4($a0)
lw $t0, 8($a0)
lw $t1, 0xC($a0)
lw $t2, 0x10($a0)
lw $t3, 0x14($a0)
lw $t4, 0x18($a0)
lw $t5, 0x1C($a0)
addiu $a0, $a0, 0x20
addiu $a1, $a1, 0x20
addiu $a2, $a2, -0x20
sw $v0, -0x20($a1)
sw $v1, -0x1C($a1)
sw $t0, -0x18($a1)
sw $t1, -0x14($a1)
sw $t2, -0x10($a1)
sw $t3, -0xC($a1)
sw $t4, -8($a1)
b forwards_32
sw $t5, -4($a1)
forwards_16:
slti $at, $a2, 0x10
forwards_16_: // fake label due to branch likely optimization
bnezl $at, forwards_4_
slti $at, $a2, 4
lw $v0, ($a0)
lw $v1, 4($a0)
lw $t0, 8($a0)
lw $t1, 0xC($a0)
addiu $a0, $a0, 0x10
addiu $a1, $a1, 0x10
addiu $a2, $a2, -0x10
sw $v0, -0x10($a1)
sw $v1, -0xC($a1)
sw $t0, -8($a1)
b forwards_16
sw $t1, -4($a1)
forwards_4:
slti $at, $a2, 4
forwards_4_: // fake label due to branch likely optimization
bnez $at, forwards_bytecopy
nop
lw $v0, ($a0)
addiu $a0, $a0, 4
addiu $a1, $a1, 4
addiu $a2, $a2, -4
b forwards_4
sw $v0, -4($a1)
slti $at, $a2, 0x10
gobackwards:
add $a0, $a0, $a2
bnez $at, backwards_bytecopy
add $a1, $a1, $a2
andi $v0, $a0, 3
andi $v1, $a1, 3
beq $v0, $v1, backalignable
nop
backwards_bytecopy:
beqz $a2, ret
nop
addiu $a0, $a0, -1
addiu $a1, $a1, -1
subu $v1, $a0, $a2
99:
lb $v0, ($a0)
addiu $a0, $a0, -1
addiu $a1, $a1, -1
bne $a0, $v1, 99b
sb $v0, 1($a1)
jr $ra
move $v0, $a3
backalignable:
beqz $v0, backwards_32
li $at, 3
beq $v0, $at, back_copy3
li $at, 2
beql $v0, $at, back_copy2
lh $v0, -2($a0)
lb $v0, -1($a0)
addiu $a0, $a0, -1
addiu $a1, $a1, -1
addiu $a2, $a2, -1
b backwards_32
sb $v0, ($a1)
lh $v0, -2($a0)
back_copy2:
addiu $a0, $a0, -2
addiu $a1, $a1, -2
addiu $a2, $a2, -2
b backwards_32
sh $v0, ($a1)
back_copy3:
lb $v0, -1($a0)
lh $v1, -3($a0)
addiu $a0, $a0, -3
addiu $a1, $a1, -3
addiu $a2, $a2, -3
sb $v0, 2($a1)
sh $v1, ($a1)
backwards:
backwards_32:
slti $at, $a2, 0x20
bnezl $at, backwards_16_
slti $at, $a2, 0x10
lw $v0, -4($a0)
lw $v1, -8($a0)
lw $t0, -0xc($a0)
lw $t1, -0x10($a0)
lw $t2, -0x14($a0)
lw $t3, -0x18($a0)
lw $t4, -0x1c($a0)
lw $t5, -0x20($a0)
addiu $a0, $a0, -0x20
addiu $a1, $a1, -0x20
addiu $a2, $a2, -0x20
sw $v0, 0x1C($a1)
sw $v1, 0x18($a1)
sw $t0, 0x14($a1)
sw $t1, 0x10($a1)
sw $t2, 0xC($a1)
sw $t3, 8($a1)
sw $t4, 4($a1)
b backwards_32
sw $t5, ($a1)
backwards_16:
slti $at, $a2, 0x10
backwards_16_: // fake label due to branch likely optimization
bnezl $at, backwards_4_
slti $at, $a2, 4
lw $v0, -4($a0)
lw $v1, -8($a0)
lw $t0, -0xC($a0)
lw $t1, -0x10($a0)
addiu $a0, $a0, -0x10
addiu $a1, $a1, -0x10
addiu $a2, $a2, -0x10
sw $v0, 0xC($a1)
sw $v1, 8($a1)
sw $t0, 4($a1)
b backwards_16
sw $t1, ($a1)
backwards_4:
slti $at, $a2, 4
backwards_4_: // fake label due to branch likely optimization
bnez $at, backwards_bytecopy
nop
lw $v0, -4($a0)
addiu $a0, $a0, -4
addiu $a1, $a1, -4
addiu $a2, $a2, -4
b backwards_4
sw $v0, ($a1)
END(bcopy)