Experimental code that turned out to be too slow and got scrapped.
This commit is contained in:
parent
cb8654ac7e
commit
24d67c7cd6
4 changed files with 185 additions and 0 deletions
26
docs/assembly_references.txt
Normal file
26
docs/assembly_references.txt
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
EFLAGS bit meaning
|
||||||
|
http://reverseengineering.stackexchange.com/a/9222
|
||||||
|
|
||||||
|
GCC inline assembly
|
||||||
|
http://ibiblio.org/gferg/ldp/GCC-Inline-Assembly-HOWTO.html
|
||||||
|
|
||||||
|
Popping the FPU stack
|
||||||
|
http://stackoverflow.com/a/33575875/768582
|
||||||
|
|
||||||
|
Decoding a sample snippet of asm
|
||||||
|
http://stackoverflow.com/questions/9186150/decoding-and-understanding-assembly-code
|
||||||
|
|
||||||
|
GNU assembler examples
|
||||||
|
http://cs.lmu.edu/~ray/notes/gasexamples/
|
||||||
|
|
||||||
|
GCC function attributes (see "naked")
|
||||||
|
https://gcc.gnu.org/onlinedocs/gcc-4.3.5/gcc/Function-Attributes.html
|
||||||
|
|
||||||
|
Fast floating point sign
|
||||||
|
http://stackoverflow.com/a/2508911/768582
|
||||||
|
|
||||||
|
AMD assembly reference
|
||||||
|
http://developer.amd.com/wordpress/media/2012/10/26569_APM_v51.pdf
|
||||||
|
|
||||||
|
Moving values between SSE and FPU
|
||||||
|
http://stackoverflow.com/questions/37567154/intel-x86-64-assembly-how-to-move-between-x87-and-sse2-calculating-arctangent#37573264
|
48
docs/old_code/asm/x86_64/fsgn.s
Normal file
48
docs/old_code/asm/x86_64/fsgn.s
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
#
|
||||||
|
# Copyright 2016, 2017 Michele "King_DuckZ" Santullo
|
||||||
|
|
||||||
|
# This file is part of MyCurry.
|
||||||
|
|
||||||
|
# MyCurry is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
|
||||||
|
# MyCurry is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with MyCurry. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
.data
|
||||||
|
minus1: .float -1.0
|
||||||
|
|
||||||
|
.global fsgn_asm
|
||||||
|
|
||||||
|
.text
|
||||||
|
fsgn_asm:
|
||||||
|
subq $24,%rsp
|
||||||
|
movss %xmm0,(%rsp)
|
||||||
|
fldz
|
||||||
|
flds (%rsp)
|
||||||
|
fcomp
|
||||||
|
|
||||||
|
fstsw %ax
|
||||||
|
sahf
|
||||||
|
|
||||||
|
fld1
|
||||||
|
fld minus1
|
||||||
|
|
||||||
|
fcmovnbe %st(1),%st
|
||||||
|
fcmove %st(2),%st
|
||||||
|
|
||||||
|
fstps (%rsp)
|
||||||
|
fninit
|
||||||
|
movss (%rsp),%xmm0
|
||||||
|
|
||||||
|
#xorpd %xmm0,%xmm0
|
||||||
|
addq $24,%rsp
|
||||||
|
ret
|
83
docs/old_code/fsgn_timing.cpp
Normal file
83
docs/old_code/fsgn_timing.cpp
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
/*
|
||||||
|
Copyright 2016, 2017 Michele "King_DuckZ" Santullo
|
||||||
|
|
||||||
|
This file is part of MyCurry.
|
||||||
|
|
||||||
|
MyCurry is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
MyCurry is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with MyCurry. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "fsgn_timing.hpp"
|
||||||
|
#include "fsgn.hpp"
|
||||||
|
#if defined(FSGN_WITH_TIMING)
|
||||||
|
# include <chrono>
|
||||||
|
# include <random>
|
||||||
|
# include <vector>
|
||||||
|
# include <algorithm>
|
||||||
|
# include <iostream>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace curry {
|
||||||
|
#if defined(FSGN_WITH_TIMING)
|
||||||
|
void do_fsgn_timing() {
|
||||||
|
constexpr const auto count = 1000000U;
|
||||||
|
std::minstd_rand rand;
|
||||||
|
std::vector<float> inputs(count);
|
||||||
|
std::generate(inputs.begin(), inputs.end(), [&](){
|
||||||
|
return static_cast<float>(rand()) - static_cast<float>(rand.max()) / 2.0f;
|
||||||
|
});
|
||||||
|
|
||||||
|
//fast_fsgn
|
||||||
|
{
|
||||||
|
float result = 0.0f;
|
||||||
|
|
||||||
|
auto t_start = std::chrono::high_resolution_clock::now();
|
||||||
|
for (auto z = 0U; z < count; ++z) {
|
||||||
|
result += fast_fsgn(inputs[z]);
|
||||||
|
}
|
||||||
|
auto t_end = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
std::cout << "fast_fsgn result: " << result << " in " <<
|
||||||
|
std::chrono::duration<double>(t_end - t_start).count() << '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
//fsgn_asm
|
||||||
|
{
|
||||||
|
float result = 0.0f;
|
||||||
|
|
||||||
|
auto t_start = std::chrono::high_resolution_clock::now();
|
||||||
|
for (auto z = 0U; z < count; ++z) {
|
||||||
|
result += fsgn_asm(inputs[z]);
|
||||||
|
}
|
||||||
|
auto t_end = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
std::cout << "fsgn_asm result: " << result << " in " <<
|
||||||
|
std::chrono::duration<double>(t_end - t_start).count() << '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
//fsgn
|
||||||
|
{
|
||||||
|
float result = 0.0f;
|
||||||
|
|
||||||
|
auto t_start = std::chrono::high_resolution_clock::now();
|
||||||
|
for (auto z = 0U; z < count; ++z) {
|
||||||
|
result += fsgn(inputs[z]);
|
||||||
|
}
|
||||||
|
auto t_end = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
std::cout << "fsgn result: " << result << " in " <<
|
||||||
|
std::chrono::duration<double>(t_end - t_start).count() << '\n';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
} //namespace curry
|
28
docs/old_code/fsgn_timing.hpp
Normal file
28
docs/old_code/fsgn_timing.hpp
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
/*
|
||||||
|
Copyright 2016, 2017 Michele "King_DuckZ" Santullo
|
||||||
|
|
||||||
|
This file is part of MyCurry.
|
||||||
|
|
||||||
|
MyCurry is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
MyCurry is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with MyCurry. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#define FSGN_WITH_TIMING
|
||||||
|
|
||||||
|
namespace curry {
|
||||||
|
#if defined(FSGN_WITH_TIMING)
|
||||||
|
void do_fsgn_timing();
|
||||||
|
#endif
|
||||||
|
} //namespace curry
|
Loading…
Reference in a new issue