From 9fff9d793c5ce65ac23255f40f9fab1074c4adc8 Mon Sep 17 00:00:00 2001 From: King_DuckZ Date: Wed, 24 May 2017 09:34:52 +0100 Subject: [PATCH] Move ip requesting code into a separate file and make hashing configurable at build time. New code tries to guess a per-visitor IP address and uses that (or its hash) to tell users apart. This patch also adds a TAWASHI_WITH_IP_LOGGING cmake option to enable or disable logging the IP address of your visitors in various places (just one right now but don't assume things to remain this way). Also added a couple new CGI environment variables. --- CMakeLists.txt | 2 + src/tawashiConfig.h.in | 1 + src/tawashi_implem/CMakeLists.txt | 1 + src/tawashi_implem/cgi_env.cpp | 8 ++ src/tawashi_implem/cgi_env.hpp | 2 + src/tawashi_implem/cgi_environment_vars.hpp | 2 + src/tawashi_implem/ip_utils.cpp | 83 ++++++++++++++++++++ src/tawashi_implem/ip_utils.hpp | 29 +++++++ src/tawashi_implem/submit_paste_response.cpp | 36 ++------- 9 files changed, 133 insertions(+), 31 deletions(-) create mode 100644 src/tawashi_implem/ip_utils.cpp create mode 100644 src/tawashi_implem/ip_utils.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index bb81ee0..249e023 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,8 @@ project(tawashi_top VERSION 0.1.11 LANGUAGES NONE) include(CTest) +option(TAWASHI_WITH_IP_LOGGING "Enable code in Tawashi that may result in users IPs being stored in the DB or in logs" ON) + set(INCREDIS_FORCE_DISABLE_TESTS ON) set(TAWASHI_SOURCE_ROOT "${CMAKE_CURRENT_SOURCE_DIR}") set(TAWASHI_GEN_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include") diff --git a/src/tawashiConfig.h.in b/src/tawashiConfig.h.in index 43f9e5a..1956ee2 100644 --- a/src/tawashiConfig.h.in +++ b/src/tawashiConfig.h.in @@ -23,3 +23,4 @@ #define VERSION_MAJOR @PROJECT_VERSION_MAJOR@ #define VERSION_MINOR @PROJECT_VERSION_MINOR@ #define VERSION_PATCH @PROJECT_VERSION_PATCH@ +#cmakedefine TAWASHI_WITH_IP_LOGGING diff --git a/src/tawashi_implem/CMakeLists.txt b/src/tawashi_implem/CMakeLists.txt index 9ea67ea..074cd66 100644 --- a/src/tawashi_implem/CMakeLists.txt +++ b/src/tawashi_implem/CMakeLists.txt @@ -28,6 +28,7 @@ add_library(${PROJECT_NAME} STATIC tawashi_exception.cpp http_header.cpp quick_submit_paste_response.cpp + ip_utils.cpp ) target_include_directories(${PROJECT_NAME} diff --git a/src/tawashi_implem/cgi_env.cpp b/src/tawashi_implem/cgi_env.cpp index 284e60f..a601219 100644 --- a/src/tawashi_implem/cgi_env.cpp +++ b/src/tawashi_implem/cgi_env.cpp @@ -150,6 +150,14 @@ namespace cgi { return m_cgi_env[CGIVars::QUERY_STRING]; } + const std::string& Env::http_client_ip() const { + return m_cgi_env[CGIVars::HTTP_CLIENT_IP]; + } + + const std::string& Env::http_x_forwarded_for() const { + return m_cgi_env[CGIVars::HTTP_X_FORWARDED_FOR]; + } + const std::string& Env::remote_addr() const { return m_cgi_env[CGIVars::REMOTE_ADDR]; } diff --git a/src/tawashi_implem/cgi_env.hpp b/src/tawashi_implem/cgi_env.hpp index 9fda7c7..c41905c 100644 --- a/src/tawashi_implem/cgi_env.hpp +++ b/src/tawashi_implem/cgi_env.hpp @@ -52,6 +52,8 @@ namespace tawashi { boost::string_ref path_info() const; const std::string& path_translated() const; const std::string& query_string() const; + const std::string& http_client_ip() const; + const std::string& http_x_forwarded_for() const; const std::string& remote_addr() const; const std::string& remote_host() const; const std::string& remote_ident() const; diff --git a/src/tawashi_implem/cgi_environment_vars.hpp b/src/tawashi_implem/cgi_environment_vars.hpp index 6ee4124..fa5475e 100644 --- a/src/tawashi_implem/cgi_environment_vars.hpp +++ b/src/tawashi_implem/cgi_environment_vars.hpp @@ -28,9 +28,11 @@ namespace tawashi { CONTENT_TYPE, DOCUMENT_ROOT, //The root directory of your server GATEWAY_INTERFACE, + HTTP_CLIENT_IP, HTTP_COOKIE, //The visitor's cookie, if one is set HTTP_HOST, //The hostname of your server HTTP_REFERER, //The URL of the page that called your script + HTTP_X_FORWARDED_FOR, HTTPS, //"on" if the script is being called through a secure server HTTP_USER_AGENT, //The browser type of your visitor PATH, //The system path your server is running under diff --git a/src/tawashi_implem/ip_utils.cpp b/src/tawashi_implem/ip_utils.cpp new file mode 100644 index 0000000..9a0c9fd --- /dev/null +++ b/src/tawashi_implem/ip_utils.cpp @@ -0,0 +1,83 @@ +/* Copyright 2017, Michele Santullo + * This file is part of "tawashi". + * + * "tawashi" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "tawashi" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "tawashi". If not, see . + */ + +#include "ip_utils.hpp" +#include "duckhandy/lexical_cast.hpp" +#include "duckhandy/int_to_string_ary.hpp" +#include "cgi_env.hpp" +#include "tawashiConfig.h" +#include +#include +#include +#include + +#if !defined(TAWASHI_WITH_IP_LOGGING) +extern "C" void tiger (const char* parStr, uint64_t parLength, uint64_t parHash[3], char parPadding); +#endif + +namespace tawashi { + namespace { + std::string hash_if_configured (const std::string& parIP) a_always_inline; + +#if !defined(TAWASHI_WITH_IP_LOGGING) + std::string hashed_ip (const std::string& parIP) { + using dhandy::tags::hex; + + uint64_t hash[3]; + tiger(parIP.data(), parIP.size(), hash, 0x80); + + auto h1 = dhandy::int_to_string_ary(hash[0]); + auto h2 = dhandy::int_to_string_ary(hash[1]); + auto h3 = dhandy::int_to_string_ary(hash[2]); + + std::string retval(2 * sizeof(uint64_t) * 3, '0'); + assert(h1.size() <= 2 * sizeof(uint64_t)); + std::copy(h1.begin(), h1.end(), retval.begin() + 2 * sizeof(uint64_t) * 0 + 2 * sizeof(uint64_t) - h1.size()); + assert(h2.size() <= 2 * sizeof(uint64_t)); + std::copy(h2.begin(), h2.end(), retval.begin() + 2 * sizeof(uint64_t) * 1 + 2 * sizeof(uint64_t) - h2.size()); + assert(h3.size() <= 2 * sizeof(uint64_t)); + std::copy(h3.begin(), h3.end(), retval.begin() + 2 * sizeof(uint64_t) * 2 + 2 * sizeof(uint64_t) - h3.size()); + + SPDLOG_DEBUG(spdlog::get("statuslog"), "IP \"{}\" hashed -> \"{}\"", parIP, retval); + assert(retval.size() == 16 * 3); + return retval; + } +#endif + + inline std::string hash_if_configured (const std::string& parIP) { +#if defined(TAWASHI_WITH_IP_LOGGING) + return parIP; +#else + return hashed_ip(parIP); +#endif + } + + } //unnamed namespace + + //see: https://stackoverflow.com/questions/18799808/how-do-i-count-unique-visitors-to-my-site + std::string guess_real_remote_ip (const cgi::Env& parCgiEnv) { + if (not parCgiEnv.http_client_ip().empty()) { + return hash_if_configured(parCgiEnv.http_client_ip()); + } + else if (not parCgiEnv.http_x_forwarded_for().empty()) { + return hash_if_configured(parCgiEnv.http_x_forwarded_for()); + } + else { + return hash_if_configured(parCgiEnv.remote_addr()); + } + } +} //namespace tawashi diff --git a/src/tawashi_implem/ip_utils.hpp b/src/tawashi_implem/ip_utils.hpp new file mode 100644 index 0000000..548bf7e --- /dev/null +++ b/src/tawashi_implem/ip_utils.hpp @@ -0,0 +1,29 @@ +/* Copyright 2017, Michele Santullo + * This file is part of "tawashi". + * + * "tawashi" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "tawashi" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "tawashi". If not, see . + */ + +#pragma once + +#include "duckhandy/compatibility.h" +#include + +namespace tawashi { + namespace cgi { + class Env; + } //namespace cgi + + std::string guess_real_remote_ip (const cgi::Env& parCgiEnv) a_pure; +} //namespace tawashi diff --git a/src/tawashi_implem/submit_paste_response.cpp b/src/tawashi_implem/submit_paste_response.cpp index 8c9a13f..b70d87c 100644 --- a/src/tawashi_implem/submit_paste_response.cpp +++ b/src/tawashi_implem/submit_paste_response.cpp @@ -18,21 +18,18 @@ #include "submit_paste_response.hpp" #include "incredis/incredis.hpp" #include "cgi_post.hpp" -#include "cgi_env.hpp" #include "num_to_token.hpp" #include "settings_bag.hpp" #include "duckhandy/compatibility.h" #include "duckhandy/lexical_cast.hpp" -#include "duckhandy/int_to_string_ary.hpp" #include "tawashi_exception.hpp" +#include "ip_utils.hpp" #include #include #include #include #include -extern "C" void tiger (const char* parStr, uint64_t parLength, uint64_t parHash[3], char parPadding); - namespace tawashi { namespace { const char g_post_key[] = "pastie"; @@ -75,29 +72,6 @@ namespace tawashi { return boost::string_ref(); } } - - std::string hashed_ip (const std::string& parIP) { - using dhandy::tags::hex; - - uint64_t hash[3]; - tiger(parIP.data(), parIP.size(), hash, 0x80); - - auto h1 = dhandy::int_to_string_ary(hash[0]); - auto h2 = dhandy::int_to_string_ary(hash[1]); - auto h3 = dhandy::int_to_string_ary(hash[2]); - - std::string retval(2 * sizeof(uint64_t) * 3, '0'); - assert(h1.size() <= 2 * sizeof(uint64_t)); - std::copy(h1.begin(), h1.end(), retval.begin() + 2 * sizeof(uint64_t) * 0 + 2 * sizeof(uint64_t) - h1.size()); - assert(h2.size() <= 2 * sizeof(uint64_t)); - std::copy(h2.begin(), h2.end(), retval.begin() + 2 * sizeof(uint64_t) * 1 + 2 * sizeof(uint64_t) - h2.size()); - assert(h3.size() <= 2 * sizeof(uint64_t)); - std::copy(h3.begin(), h3.end(), retval.begin() + 2 * sizeof(uint64_t) * 2 + 2 * sizeof(uint64_t) - h3.size()); - - SPDLOG_DEBUG(spdlog::get("statuslog"), "IP \"{}\" hashed -> \"{}\"", parIP, retval); - assert(retval.size() == 16 * 3); - return retval; - } } //unnamed namespace SubmitPasteResponse::SubmitPasteResponse ( @@ -174,8 +148,8 @@ namespace tawashi { return std::make_pair(boost::optional(), make_error_redirect(ErrorReasons::RedisDisconnected)); } - std::string ip_hash = hashed_ip(cgi_env().remote_addr()); - if (redis.get(ip_hash)) { + std::string remote_ip = guess_real_remote_ip(cgi_env()); + if (redis.get(remote_ip)) { //please wait and submit again return std::make_pair(boost::optional(), make_error_redirect(ErrorReasons::UserFlooding)); } @@ -188,8 +162,8 @@ namespace tawashi { "max_ttl", dhandy::lexical_cast(parExpiry), "lang", parLang) ) { - redis.set(ip_hash, ""); - redis.expire(ip_hash, settings().as("resubmit_wait")); + redis.set(remote_ip, ""); + redis.expire(remote_ip, settings().as("resubmit_wait")); if (redis.expire(token, parExpiry)) return std::make_pair(boost::make_optional(token), HttpHeader()); }