1
0
Fork 0
mirror of https://github.com/zeldaret/oot.git synced 2025-08-12 01:40:47 +00:00

Use IDO for assembling handwritten asm files in src (#2390)

* as0

* Fix ASOPTFLAGS for src/libc, remove unnecessary noreorder region in kanread

* Suggested changes

* Use %half to load the boot bss size for matching

Co-authored-by: cadmic <cadmic24@gmail.com>

* Wrap all of __osProbeTLB in noreorder

---------

Co-authored-by: cadmic <cadmic24@gmail.com>
This commit is contained in:
Tharo 2024-12-28 20:18:45 +00:00 committed by GitHub
parent ba6a83533a
commit 7e082f0c4f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
54 changed files with 2612 additions and 2653 deletions

View file

@ -1,93 +1,89 @@
#include "ultra64/asm.h"
#include "ultra64/regdef.h"
.set noat
.set noreorder
.section .text
.balign 16
.text
LEAF(bcmp)
slti $at, $a2, 0x10
bnez $at, bytecmp
xor $v0, $a0, $a1
andi $v0, $v0, 3
bnez $v0, unaligncmp
negu $t8, $a0
andi $t8, $t8, 3
beqz $t8, wordcmp
subu $a2, $a2, $t8
move $v0, $v1
lwl $v0, ($a0)
lwl $v1, ($a1)
addu $a0, $a0, $t8
addu $a1, $a1, $t8
bne $v0, $v1, cmpne
xor v0, a0, a1
blt a2, 0x10, bytecmp
and v0, v0, 3
negu t8, a0
bnez v0, unaligncmp
and t8, t8, 3
subu a2, a2, t8
beqz t8, wordcmp
move v0, v1
lwl v0, (a0)
lwl v1, (a1)
addu a0, a0, t8
addu a1, a1, t8
bne v0, v1, cmpne
wordcmp:
li $at, ~3
and $a3, $a2, $at
beqz $a3, bytecmp
subu $a2, $a2, $a3
addu $a3, $a3, $a0
lw $v0, ($a0)
and a3, a2, ~3
subu a2, a2, a3
beqz a3, bytecmp
addu a3, a3, a0
1:
lw $v1, ($a1)
addiu $a0, $a0, 4
addiu $a1, $a1, 4
bne $v0, $v1, cmpne
nop
bnel $a0, $a3, 1b
lw $v0, ($a0)
lw v0, (a0)
lw v1, (a1)
addu a0, a0, 4
addu a1, a1, 4
bne v0, v1, cmpne
bne a0, a3, 1b
b bytecmp
nop
unaligncmp:
negu $a3, $a1
andi $a3, $a3, 3
beqz $a3, partaligncmp
subu $a2, $a2, $a3
addu $a3, $a3, $a0
lbu $v0, ($a0)
negu a3, a1
and a3, a3, 3
subu a2, a2, a3
beqz a3, partaligncmp
addu a3, a3, a0
1:
lbu $v1, ($a1)
addiu $a0, $a0, 1
addiu $a1, $a1, 1
bne $v0, $v1, cmpne
nop
bnel $a0, $a3, 1b
lbu $v0, ($a0)
lbu v0, (a0)
lbu v1, (a1)
addu a0, a0, 1
addu a1, a1, 1
bne v0, v1, cmpne
bne a0, a3, 1b
partaligncmp:
li $at, ~3
and $a3, $a2, $at
beqz $a3, bytecmp
subu $a2, $a2, $a3
addu $a3, $a3, $a0
lwl $v0, ($a0)
and a3, a2, ~3
subu a2, a2, a3
beqz a3, bytecmp
addu a3, a3, a0
1:
lw $v1, ($a1)
lwr $v0, 3($a0)
addiu $a0, $a0, 4
addiu $a1, $a1, 4
bne $v0, $v1, cmpne
nop
bnel $a0, $a3, 1b
lwl $v0, ($a0)
lwl v0, (a0)
lw v1, (a1)
lwr v0, 3(a0)
addu a0, a0, 4
addu a1, a1, 4
bne v0, v1, cmpne
bne a0, a3, 1b
bytecmp:
blez $a2, cmpdone
addu $a3, $a2, $a0
lbu $v0, ($a0)
addu a3, a2, a0
blez a2, cmpdone
1:
lbu $v1, ($a1)
addiu $a0, $a0, 1
addiu $a1, $a1, 1
bne $v0, $v1, cmpne
nop
bnel $a0, $a3, 1b
lbu $v0, ($a0)
lbu v0, (a0)
lbu v1, (a1)
addu a0, a0, 1
addu a1, a1, 1
bne v0, v1, cmpne
bne a0, a3, 1b
cmpdone:
jr $ra
move $v0, $zero
move v0, zero
jr ra
cmpne:
jr $ra
li $v0, 1
li v0, 1
jr ra
END(bcmp)

View file

@ -1,233 +1,211 @@
#include "ultra64/asm.h"
#include "ultra64/regdef.h"
.set noat
.set noreorder
.section .text
.balign 16
.text
LEAF(bcopy)
beqz $a2, ret
move $a3, $a1
beq $a0, $a1, ret
slt $at, $a1, $a0
bnezl $at, goforwards
slti $at, $a2, 0x10
add $v0, $a0, $a2
slt $at, $a1, $v0
beql $at, $zero, goforwards
slti $at, $a2, 0x10
move a3, a1
beqz a2, ret
beq a0, a1, ret
blt a1, a0, goforwards
add v0, a0, a2
bge a1, v0, goforwards
b gobackwards
slti $at, $a2, 0x10
slti $at, $a2, 0x10
goforwards:
bnez $at, forwards_bytecopy
nop
andi $v0, $a0, 3
andi $v1, $a1, 3
beq $v0, $v1, forwalignable
nop
blt a2, 0x10, forwards_bytecopy
and v0, a0, 3
and v1, a1, 3
beq v0, v1, forwalignable
forwards_bytecopy:
beqz $a2, ret
nop
addu $v1, $a0, $a2
beqz a2, ret
addu v1, a0, a2
99:
lb $v0, ($a0)
addiu $a0, $a0, 1
addiu $a1, $a1, 1
bne $a0, $v1, 99b
sb $v0, -1($a1)
lb v0, (a0)
addu a0, a0, 1
sb v0, (a1)
addu a1, a1, 1
bne a0, v1, 99b
ret:
jr $ra
move $v0, $a3
move v0, a3
jr ra
forwalignable:
beqz $v0, forwards_32
li $at, 1
beq $v0, $at, forw_copy3
li $at, 2
beql $v0, $at, forw_copy2
lh $v0, ($a0)
lb $v0, ($a0)
addiu $a0, $a0, 1
addiu $a1, $a1, 1
addiu $a2, $a2, -1
beqz v0, forwards_32
beq v0, 1, forw_copy3
beq v0, 2, forw_copy2
lb v0, (a0)
addu a0, a0, 1
sb v0, (a1)
addu a1, a1, 1
addu a2, a2, -1
b forwards_32
sb $v0, -1($a1)
lh $v0, ($a0)
forw_copy2:
addiu $a0, $a0, 2
addiu $a1, $a1, 2
addiu $a2, $a2, -2
lh v0, (a0)
addu a0, a0, 2
sh v0, (a1)
addu a1, a1, 2
addu a2, a2, -2
b forwards_32
sh $v0, -2($a1)
forw_copy3:
lb $v0, ($a0)
lh $v1, 1($a0)
addiu $a0, $a0, 3
addiu $a1, $a1, 3
addiu $a2, $a2, -3
sb $v0, -3($a1)
sh $v1, -2($a1)
lb v0, (a0)
lh v1, 1(a0)
addiu a0, a0, 3
sb v0, (a1)
sh v1, 1(a1)
addiu a1, a1, 3
addiu a2, a2, -3
forwards:
forwards_32:
slti $at, $a2, 0x20
bnezl $at, forwards_16_
slti $at, $a2, 0x10
lw $v0, ($a0)
lw $v1, 4($a0)
lw $t0, 8($a0)
lw $t1, 0xC($a0)
lw $t2, 0x10($a0)
lw $t3, 0x14($a0)
lw $t4, 0x18($a0)
lw $t5, 0x1C($a0)
addiu $a0, $a0, 0x20
addiu $a1, $a1, 0x20
addiu $a2, $a2, -0x20
sw $v0, -0x20($a1)
sw $v1, -0x1C($a1)
sw $t0, -0x18($a1)
sw $t1, -0x14($a1)
sw $t2, -0x10($a1)
sw $t3, -0xC($a1)
sw $t4, -8($a1)
blt a2, 32, forwards_16
lw v0, 0(a0)
lw v1, 4(a0)
lw t0, 8(a0)
lw t1, 12(a0)
lw t2, 16(a0)
lw t3, 20(a0)
lw t4, 24(a0)
lw t5, 28(a0)
addiu a0, a0, 32
sw v0, 0(a1)
sw v1, 4(a1)
sw t0, 8(a1)
sw t1, 12(a1)
sw t2, 16(a1)
sw t3, 20(a1)
sw t4, 24(a1)
sw t5, 28(a1)
addiu a1, a1, 32
addiu a2, a2, -32
b forwards_32
sw $t5, -4($a1)
forwards_16:
slti $at, $a2, 0x10
forwards_16_: // fake label due to branch likely optimization
bnezl $at, forwards_4_
slti $at, $a2, 4
lw $v0, ($a0)
lw $v1, 4($a0)
lw $t0, 8($a0)
lw $t1, 0xC($a0)
addiu $a0, $a0, 0x10
addiu $a1, $a1, 0x10
addiu $a2, $a2, -0x10
sw $v0, -0x10($a1)
sw $v1, -0xC($a1)
sw $t0, -8($a1)
blt a2, 16, forwards_4
lw v0, 0(a0)
lw v1, 4(a0)
lw t0, 8(a0)
lw t1, 12(a0)
addiu a0, a0, 16
sw v0, 0(a1)
sw v1, 4(a1)
sw t0, 8(a1)
sw t1, 12(a1)
addiu a1, a1, 16
addiu a2, a2, -16
b forwards_16
sw $t1, -4($a1)
forwards_4:
slti $at, $a2, 4
forwards_4_: // fake label due to branch likely optimization
bnez $at, forwards_bytecopy
nop
lw $v0, ($a0)
addiu $a0, $a0, 4
addiu $a1, $a1, 4
addiu $a2, $a2, -4
blt a2, 4, forwards_bytecopy
lw v0, 0(a0)
addiu a0, a0, 4
sw v0, 0(a1)
addiu a1, a1, 4
addiu a2, a2, -4
b forwards_4
sw $v0, -4($a1)
slti $at, $a2, 0x10
gobackwards:
add $a0, $a0, $a2
bnez $at, backwards_bytecopy
add $a1, $a1, $a2
andi $v0, $a0, 3
andi $v1, $a1, 3
beq $v0, $v1, backalignable
nop
add a0, a0,a2
add a1, a1,a2
blt a2, 16, backwards_bytecopy
andi v0, a0, 0x3
andi v1, a1, 0x3
beq v0, v1, backalignable
backwards_bytecopy:
beqz $a2, ret
nop
addiu $a0, $a0, -1
addiu $a1, $a1, -1
subu $v1, $a0, $a2
beqz a2, ret
addiu a0, a0, -1
addiu a1, a1, -1
subu v1, a0,a2
99:
lb $v0, ($a0)
addiu $a0, $a0, -1
addiu $a1, $a1, -1
bne $a0, $v1, 99b
sb $v0, 1($a1)
jr $ra
move $v0, $a3
lb v0, 0(a0)
addiu a0, a0, -1
sb v0, 0(a1)
addiu a1, a1, -1
bne a0, v1,99b
move v0, a3
jr ra
backalignable:
beqz $v0, backwards_32
li $at, 3
beq $v0, $at, back_copy3
li $at, 2
beql $v0, $at, back_copy2
lh $v0, -2($a0)
lb $v0, -1($a0)
addiu $a0, $a0, -1
addiu $a1, $a1, -1
addiu $a2, $a2, -1
b backwards_32
sb $v0, ($a1)
lh $v0, -2($a0)
beqz v0, backwards
beq v0, 3, back_copy3
beq v0, 2, back_copy2
lb v0, -1(a0)
addiu a0, a0, -1
sb v0, -1(a1)
addiu a1, a1, -1
addiu a2, a2, -1
b backwards
back_copy2:
addiu $a0, $a0, -2
addiu $a1, $a1, -2
addiu $a2, $a2, -2
b backwards_32
sh $v0, ($a1)
lh v0, -2(a0)
addiu a0, a0, -2
sh v0, -2(a1)
addiu a1, a1, -2
addiu a2, a2, -2
b backwards
back_copy3:
lb $v0, -1($a0)
lh $v1, -3($a0)
addiu $a0, $a0, -3
addiu $a1, $a1, -3
addiu $a2, $a2, -3
sb $v0, 2($a1)
sh $v1, ($a1)
lb v0, -1(a0)
lh v1, -3(a0)
addiu a0, a0, -3
sb v0, -1(a1)
sh v1, -3(a1)
addiu a1, a1, -3
addiu a2, a2, -3
backwards:
backwards_32:
slti $at, $a2, 0x20
bnezl $at, backwards_16_
slti $at, $a2, 0x10
lw $v0, -4($a0)
lw $v1, -8($a0)
lw $t0, -0xc($a0)
lw $t1, -0x10($a0)
lw $t2, -0x14($a0)
lw $t3, -0x18($a0)
lw $t4, -0x1c($a0)
lw $t5, -0x20($a0)
addiu $a0, $a0, -0x20
addiu $a1, $a1, -0x20
addiu $a2, $a2, -0x20
sw $v0, 0x1C($a1)
sw $v1, 0x18($a1)
sw $t0, 0x14($a1)
sw $t1, 0x10($a1)
sw $t2, 0xC($a1)
sw $t3, 8($a1)
sw $t4, 4($a1)
blt a2, 32, backwards_16
lw v0, -4(a0)
lw v1, -8(a0)
lw t0, -12(a0)
lw t1, -16(a0)
lw t2, -20(a0)
lw t3, -24(a0)
lw t4, -28(a0)
lw t5, -32(a0)
addiu a0, a0, -32
sw v0, -4(a1)
sw v1, -8(a1)
sw t0, -12(a1)
sw t1, -16(a1)
sw t2, -20(a1)
sw t3, -24(a1)
sw t4, -28(a1)
sw t5, -32(a1)
addiu a1, a1, -32
addiu a2, a2, -32
b backwards_32
sw $t5, ($a1)
backwards_16:
slti $at, $a2, 0x10
backwards_16_: // fake label due to branch likely optimization
bnezl $at, backwards_4_
slti $at, $a2, 4
lw $v0, -4($a0)
lw $v1, -8($a0)
lw $t0, -0xC($a0)
lw $t1, -0x10($a0)
addiu $a0, $a0, -0x10
addiu $a1, $a1, -0x10
addiu $a2, $a2, -0x10
sw $v0, 0xC($a1)
sw $v1, 8($a1)
sw $t0, 4($a1)
blt a2, 16, backwards_4
lw v0, -4(a0)
lw v1, -8(a0)
lw t0, -12(a0)
lw t1, -16(a0)
addiu a0, a0, -16
sw v0, -4(a1)
sw v1, -8(a1)
sw t0, -12(a1)
sw t1, -16(a1)
addiu a1, a1, -16
addiu a2, a2, -16
b backwards_16
sw $t1, ($a1)
backwards_4:
slti $at, $a2, 4
backwards_4_: // fake label due to branch likely optimization
bnez $at, backwards_bytecopy
nop
lw $v0, -4($a0)
addiu $a0, $a0, -4
addiu $a1, $a1, -4
addiu $a2, $a2, -4
blt a2, 4, backwards_bytecopy
lw v0, -4(a0)
addiu a0, a0, -4
sw v0, -4(a1)
addiu a1, a1, -4
addiu a2, a2, -4
b backwards_4
sw $v0, ($a1)
END(bcopy)

View file

@ -1,65 +1,59 @@
#include "ultra64/asm.h"
#include "ultra64/regdef.h"
.set noat
.set noreorder
.section .text
.balign 16
.text
LEAF(bzero)
slti $at, $a1, 0xC
bnez $at, bytezero
negu $v1, $a0
andi $v1, $v1, 3
beqz $v1, blkzero
subu $a1, $a1, $v1
swl $zero, ($a0)
addu $a0, $a0, $v1
negu v1, a0
blt a1, 0xC, bytezero
and v1, v1, 3
subu a1, a1, v1
beqz v1, blkzero
swl zero, (a0)
addu a0, a0, v1
blkzero:
// align backwards to 0x20
li $at, ~0x1F
and $a3, $a1, $at
// If the result is zero, the amount to zero is less than 0x20 bytes
beqz $a3, wordzero
subu $a1, $a1, $a3
// zero in blocks of 0x20 at a time
addu $a3, $a3, $a0
/* align backwards to 0x20 */
and a3, a1, ~(0x20 - 1)
/* If the result is zero, the amount to zero is less than 0x20 bytes */
subu a1, a1, a3
beqz a3, wordzero
/* zero in blocks of 0x20 at a time */
addu a3, a3, a0
1:
addiu $a0, $a0, 0x20
sw $zero, -0x20($a0)
sw $zero, -0x1C($a0)
sw $zero, -0x18($a0)
sw $zero, -0x14($a0)
sw $zero, -0x10($a0)
sw $zero, -0xC($a0)
sw $zero, -8($a0)
bne $a0, $a3, 1b
sw $zero, -4($a0)
sw zero, 0(a0)
sw zero, 4(a0)
sw zero, 8(a0)
sw zero, 12(a0)
addiu a0, a0, 0x20
sw zero, -16(a0)
sw zero, -12(a0)
sw zero, -8(a0)
sw zero, -4(a0)
bne a0, a3, 1b
wordzero:
// align backwards to 0x4
li $at, ~3
and $a3, $a1, $at
// If the result is zero, the amount to zero is less than 0x4 bytes
beqz $a3, bytezero
subu $a1, $a1, $a3
// zero one word at a time
addu $a3, $a3, $a0
/* align backwards to 0x4 */
and a3, a1, ~3
/* If the result is zero, the amount to zero is less than 0x4 bytes */
subu a1, a1, a3
beqz a3, bytezero
/* zero one word at a time */
addu a3, a3, a0
1:
addiu $a0, $a0, 4
bne $a0, $a3, 1b
sw $zero, -4($a0)
addu a0, a0, 4
sw zero, -4(a0)
bne a0, a3, 1b
bytezero:
// test if nothing left to zero
blez $a1, zerodone
nop
// zero one byte at a time
addu $a1, $a1, $a0
/* test if nothing left to zero */
blez a1, zerodone
/* zero one byte at a time */
addu a1, a1, a0
1:
addiu $a0, $a0, 1
bne $a0, $a1, 1b
sb $zero, -1($a0)
sb zero, (a0)
addiu a0, a0, 1
bne a0, a1, 1b
zerodone:
jr $ra
nop
jr ra
END(bzero)