Experimental code that turned out to be too slow and got scrapped.

This commit is contained in:
King_DuckZ 2017-02-08 16:37:12 +00:00
parent cb8654ac7e
commit 24d67c7cd6
4 changed files with 185 additions and 0 deletions

View file

@ -0,0 +1,26 @@
EFLAGS bit meaning
http://reverseengineering.stackexchange.com/a/9222
GCC inline assembly
http://ibiblio.org/gferg/ldp/GCC-Inline-Assembly-HOWTO.html
Popping the FPU stack
http://stackoverflow.com/a/33575875/768582
Decoding a sample snippet of asm
http://stackoverflow.com/questions/9186150/decoding-and-understanding-assembly-code
GNU assembler examples
http://cs.lmu.edu/~ray/notes/gasexamples/
GCC function attributes (see "naked")
https://gcc.gnu.org/onlinedocs/gcc-4.3.5/gcc/Function-Attributes.html
Fast floating point sign
http://stackoverflow.com/a/2508911/768582
AMD assembly reference
http://developer.amd.com/wordpress/media/2012/10/26569_APM_v51.pdf
Moving values between SSE and FPU
http://stackoverflow.com/questions/37567154/intel-x86-64-assembly-how-to-move-between-x87-and-sse2-calculating-arctangent#37573264

View file

@ -0,0 +1,48 @@
#
# Copyright 2016, 2017 Michele "King_DuckZ" Santullo
# This file is part of MyCurry.
# MyCurry is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# MyCurry is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with MyCurry. If not, see <http://www.gnu.org/licenses/>.
#
.data
minus1: .float -1.0
.global fsgn_asm
.text
fsgn_asm:
subq $24,%rsp
movss %xmm0,(%rsp)
fldz
flds (%rsp)
fcomp
fstsw %ax
sahf
fld1
fld minus1
fcmovnbe %st(1),%st
fcmove %st(2),%st
fstps (%rsp)
fninit
movss (%rsp),%xmm0
#xorpd %xmm0,%xmm0
addq $24,%rsp
ret

View file

@ -0,0 +1,83 @@
/*
Copyright 2016, 2017 Michele "King_DuckZ" Santullo
This file is part of MyCurry.
MyCurry is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
MyCurry is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with MyCurry. If not, see <http://www.gnu.org/licenses/>.
*/
#include "fsgn_timing.hpp"
#include "fsgn.hpp"
#if defined(FSGN_WITH_TIMING)
# include <chrono>
# include <random>
# include <vector>
# include <algorithm>
# include <iostream>
#endif
namespace curry {
#if defined(FSGN_WITH_TIMING)
void do_fsgn_timing() {
constexpr const auto count = 1000000U;
std::minstd_rand rand;
std::vector<float> inputs(count);
std::generate(inputs.begin(), inputs.end(), [&](){
return static_cast<float>(rand()) - static_cast<float>(rand.max()) / 2.0f;
});
//fast_fsgn
{
float result = 0.0f;
auto t_start = std::chrono::high_resolution_clock::now();
for (auto z = 0U; z < count; ++z) {
result += fast_fsgn(inputs[z]);
}
auto t_end = std::chrono::high_resolution_clock::now();
std::cout << "fast_fsgn result: " << result << " in " <<
std::chrono::duration<double>(t_end - t_start).count() << '\n';
}
//fsgn_asm
{
float result = 0.0f;
auto t_start = std::chrono::high_resolution_clock::now();
for (auto z = 0U; z < count; ++z) {
result += fsgn_asm(inputs[z]);
}
auto t_end = std::chrono::high_resolution_clock::now();
std::cout << "fsgn_asm result: " << result << " in " <<
std::chrono::duration<double>(t_end - t_start).count() << '\n';
}
//fsgn
{
float result = 0.0f;
auto t_start = std::chrono::high_resolution_clock::now();
for (auto z = 0U; z < count; ++z) {
result += fsgn(inputs[z]);
}
auto t_end = std::chrono::high_resolution_clock::now();
std::cout << "fsgn result: " << result << " in " <<
std::chrono::duration<double>(t_end - t_start).count() << '\n';
}
}
#endif
} //namespace curry

View file

@ -0,0 +1,28 @@
/*
Copyright 2016, 2017 Michele "King_DuckZ" Santullo
This file is part of MyCurry.
MyCurry is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
MyCurry is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with MyCurry. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#define FSGN_WITH_TIMING
namespace curry {
#if defined(FSGN_WITH_TIMING)
void do_fsgn_timing();
#endif
} //namespace curry