Removing version 1_0 directory

git-svn-id: http://svn.code.sf.net/p/utfcpp/code@139 a809a056-fc17-0410-9590-b4f493f8b08e
2013-02-18 00:05:43 +00:00 · 2013-02-18 00:05:43 +00:00 · fa73898a3d
commit fa73898a3d
parent 62b7d7ae0c
29 changed files with 0 additions and 423459 deletions
--- a/v1_0/buildrelease.pl
+++ b/v1_0/buildrelease.pl
@ -1,18 +0,0 @@
-#! /usr/bin/perl
-
-$release_files = 'source/utf8.h doc/utf8cpp.html doc/ReleaseNotes';
-
-# First get the latest version
-`svn update`;
-
-# Then construct the name of the zip file
-$argc = @ARGV;
-if ($argc > 0) {
-    $zip_name = $ARGV[0];
-}
-else {
-    $zip_name = "utf8";
-}
-
-# Zip the files to an archive
-`zip $zip_name $release_files`; 
--- a/v1_0/doc/ReleaseNotes
+++ b/v1_0/doc/ReleaseNotes
@ -1,11 +0,0 @@
-utf8 cpp library
-Release 1.02 
-
-utf8::previous is deprecated in favor of utf8::prior
-
-Bug Fixes: 
-[ 1599048 ] utf8::previous doesn't play well with Standard containers
-
-
-Files: utf8.h utf8cpp.html ReleaseNotes
-
--- a/v1_0/doc/utf8cpp.html
+++ b/v1_0/doc/utf8cpp.html
--- a/v1_0/samples/Makefile
+++ b/v1_0/samples/Makefile
@ -1,5 +0,0 @@
-CC = g++
-CFLAGS = -g -Wall -pedantic
-
-docsample: docsample.cpp ../source/utf8.h
-	$(CC) $(CFLAGS) docsample.cpp -odocsample
--- a/v1_0/samples/docsample.cpp
+++ b/v1_0/samples/docsample.cpp
@ -1,63 +0,0 @@
-#include "../source/utf8.h"
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <vector>
-
-
-using namespace std;
-
-int main(int argc, char** argv)
-{
-    if (argc != 2) {
-        cout << "\nUsage: docsample filename\n";
-        return 0;
-    }
-    const char* test_file_path = argv[1];
-    // Open the test file (must be UTF-8 encoded)
-    ifstream fs8(test_file_path);
-    if (!fs8.is_open()) {
-    cout << "Could not open " << test_file_path << endl;
-    return 0;
-    }
-
-    // Read the first line of the file
-    unsigned line_count = 1;
-    string line;
-    if (!getline(fs8, line)) 
-        return 0;
-
-    // Look for utf-8 byte-order mark at the beginning
-    if (line.size() > 2) {
-        if (utf8::is_bom(line.c_str()))
-          cout << "There is a byte order mark at the beginning of the file\n";
-    }
-
-    // Play with all the lines in the file
-    do {
-        // check for invalid utf-8 (for a simple yes/no check, there is also utf8::is_valid function)
-        string::iterator end_it = utf8::find_invalid(line.begin(), line.end());
-        if (end_it != line.end()) {
-            cout << "Invalid UTF-8 encoding detected at line " << line_count << "\n";
-            cout << "This part is fine: " << string(line.begin(), end_it) << "\n";
-        }
-        // Get the line length (at least for the valid part)
-        int length = utf8::distance(line.begin(), end_it);
-        cout << "Length of line " << line_count << " is " << length <<  "\n";
-
-        // Convert it to utf-16
-        vector<unsigned short> utf16line;
-        utf8::utf8to16(line.begin(), end_it, back_inserter(utf16line));
-        // And back to utf-8;
-        string utf8line; 
-        utf8::utf16to8(utf16line.begin(), utf16line.end(), back_inserter(utf8line));
-        // Confirm that the conversion went OK:
-        if (utf8line != string(line.begin(), end_it))
-            cout << "Error in UTF-16 conversion at line: " << line_count << "\n";        
-
-        getline(fs8, line);
-        line_count++;
-    } while (!fs8.eof());
-
-    return 0;
-}
--- a/v1_0/source/utf8.h
+++ b/v1_0/source/utf8.h
@ -1,569 +0,0 @@
-// Copyright 2006 Nemanja Trifunovic
-
-/*
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-
-#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
-#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
-
-#include <iterator>
-#include <exception>
-
-namespace utf8
-{
-    // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
-    // You may need to change them to match your system. 
-    // These typedefs have the same names as ones from cstdint, or boost/cstdint
-    typedef unsigned char   uint8_t;
-    typedef unsigned short  uint16_t;
-    typedef unsigned int    uint32_t;
-    
-    // Exceptions that may be thrown from the library functions.
-    class invalid_code_point : public std::exception {
-        uint32_t cp;
-    public:
-        invalid_code_point(uint32_t cp) : cp(cp) {}
-        virtual const char* what() const throw() { return "Invalid code point"; }
-        uint32_t code_point() const {return cp;}
-    };
-
-    class invalid_utf8 : public std::exception {
-        uint8_t u8;
-    public:
-        invalid_utf8 (uint8_t u) : u8(u) {}
-        virtual const char* what() const throw() { return "Invalid UTF-8"; }
-        uint8_t utf8_octet() const {return u8;}
-    };
-
-    class invalid_utf16 : public std::exception {
-        uint16_t u16;
-    public:
-        invalid_utf16 (uint16_t u) : u16(u) {}
-        virtual const char* what() const throw() { return "Invalid UTF-16"; }
-        uint16_t utf16_word() const {return u16;}
-    };
-
-    class not_enough_room : public std::exception {
-    public:
-        virtual const char* what() const throw() { return "Not enough space"; }
-    };
-
-
-
-// Helper code - not intended to be directly called by the library users. May be changed at any time
-namespace internal
-{    
-    // Unicode constants
-    // Leading (high) surrogates: 0xd800 - 0xdbff
-    // Trailing (low) surrogates: 0xdc00 - 0xdfff
-    const uint16_t LEAD_SURROGATE_MIN  = 0xd800u;
-    const uint16_t LEAD_SURROGATE_MAX  = 0xdbffu;
-    const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
-    const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
-    const uint16_t LEAD_OFFSET         = LEAD_SURROGATE_MIN - (0x10000 >> 10);
-    const uint32_t SURROGATE_OFFSET    = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
-
-    // Maximum valid value for a Unicode code point
-    const uint32_t CODE_POINT_MAX      = 0x0010ffffu;
-
-    template<typename octet_type>
-    inline uint8_t mask8(octet_type oc)
-    {
-        return static_cast<uint8_t>(0xff & oc);
-    }
-    template<typename u16_type>
-    inline uint16_t mask16(u16_type oc)
-    {
-        return static_cast<uint16_t>(0xffff & oc);
-    }
-    template<typename octet_type>
-    inline bool is_trail(octet_type oc)
-    {
-        return ((mask8(oc) >> 6) == 0x2);
-    }
-
-    template <typename u16>
-    inline bool is_surrogate(u16 cp)
-    {
-        return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
-    }
-
-    template <typename u32>
-    inline bool is_code_point_valid(u32 cp)
-    {
-        return (cp <= CODE_POINT_MAX && !is_surrogate(cp) && cp != 0xfffe && cp != 0xffff);
-    }  
-
-    template <typename octet_iterator>
-    inline typename std::iterator_traits<octet_iterator>::difference_type
-    sequence_length(octet_iterator lead_it)
-    {
-        uint8_t lead = mask8(*lead_it);
-        if (lead < 0x80) 
-            return 1;
-        else if ((lead >> 5) == 0x6)
-            return 2;
-        else if ((lead >> 4) == 0xe)
-            return 3;
-        else if ((lead >> 3) == 0x1e)
-            return 4;
-        else 
-            return 0;
-    }
-
-    enum utf_error {OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
-
-    template <typename octet_iterator>
-    utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t* code_point = 0)
-    {
-        uint32_t cp = mask8(*it);
-        // Check the lead octet
-        typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
-        octet_difference_type length = sequence_length(it);
-
-        // "Shortcut" for ASCII characters
-        if (length == 1) {
-            if (end - it > 0) {
-                if (code_point)
-                    *code_point = cp;
-                ++it;
-                return OK;
-            }
-            else
-                return NOT_ENOUGH_ROOM;
-        }
-
-        // Do we have enough memory?     
-        if (end - it < length)
-            return NOT_ENOUGH_ROOM;
-        
-        // Check trail octets and calculate the code point
-        switch (length) {
-            case 0:
-                return INVALID_LEAD;
-                break;
-            case 2:
-                if (is_trail(*(++it))) { 
-                    cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
-                }
-                else {
-                    --it;
-                    return INCOMPLETE_SEQUENCE;
-                }
-            break;
-            case 3:
-                if (is_trail(*(++it))) {
-                    cp = ((cp << 12) & 0xffff) + ((mask8(*it) << 6) & 0xfff);
-                    if (is_trail(*(++it))) {
-                        cp += (*it) & 0x3f;
-                    }
-                    else {
-                        --it; --it; 
-                        return INCOMPLETE_SEQUENCE;
-                    }
-                }
-                else {
-                    --it;
-                    return INCOMPLETE_SEQUENCE;
-                }
-            break;
-            case 4:
-                if (is_trail(*(++it))) {
-                    cp = ((cp << 18) & 0x1fffff) + ((mask8(*it) << 12) & 0x3ffff);                
-                    if (is_trail(*(++it))) {
-                        cp += (mask8(*it) << 6) & 0xfff;
-                        if (is_trail(*(++it))) {
-                            cp += (*it) & 0x3f; 
-                        }
-                        else {
-                            --it; --it; --it;
-                            return INCOMPLETE_SEQUENCE;
-                        }
-                    }
-                    else {
-                        --it; --it;
-                        return INCOMPLETE_SEQUENCE;
-                    }
-                }
-                else {
-                    --it;
-                    return INCOMPLETE_SEQUENCE;
-                }
-            break;
-        }
-        // Is the code point valid?
-        if (!is_code_point_valid(cp)) {
-            for (octet_difference_type i = 0; i < length - 1; ++i) 
-                --it;
-            return INVALID_CODE_POINT;
-        }
-            
-        if (code_point)
-            *code_point = cp;
-            
-        if (cp < 0x80) {
-            if (length != 1) {
-                for (octet_difference_type i = 0; i < length - 1; ++i)
-                    --it;
-                return OVERLONG_SEQUENCE;
-            }
-        }
-        else if (cp < 0x800) {
-            if (length != 2) {
-                for (octet_difference_type i = 0; i < length - 1; ++i)
-                    --it;
-                return OVERLONG_SEQUENCE;
-            }
-        }
-        else if (cp < 0x10000) {
-            if (length != 3) {
-                for (octet_difference_type i = 0; i < length - 1; ++i)
-                    --it;
-                return OVERLONG_SEQUENCE;
-            }
-        }
-           
-        ++it;
-        return OK;    
-    }
-
-} // namespace internal 
-    
-    /// The library API - functions intended to be called by the users
- 
-    // Byte order mark
-    const uint8_t bom[] = {0xef, 0xbb, 0xbf}; 
-
-    template <typename octet_iterator>
-    octet_iterator find_invalid(octet_iterator start, octet_iterator end)
-    {
-        octet_iterator result = start;
-        while (result != end) {
-            internal::utf_error err_code = internal::validate_next(result, end);
-            if (err_code != internal::OK)
-                return result;
-        }
-        return result;
-    }
-
-    template <typename octet_iterator>
-    bool is_valid(octet_iterator start, octet_iterator end)
-    {
-        return (find_invalid(start, end) == end);
-    }
-
-    template <typename octet_iterator>
-    bool is_bom (octet_iterator it)
-    {
-        return (
-            (internal::mask8(*it++)) == bom[0] &&
-            (internal::mask8(*it++)) == bom[1] &&
-            (internal::mask8(*it))   == bom[2]
-           );
-    }
-    template <typename octet_iterator>
-    octet_iterator append(uint32_t cp, octet_iterator result)
-    {
-        if (!internal::is_code_point_valid(cp)) 
-            throw invalid_code_point(cp);
-
-        if (cp < 0x80)                        // one octet
-            *(result++) = static_cast<uint8_t>(cp);  
-        else if (cp < 0x800) {                // two octets
-            *(result++) = static_cast<uint8_t>((cp >> 6)            | 0xc0);
-            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
-        }
-        else if (cp < 0x10000) {              // three octets
-            *(result++) = static_cast<uint8_t>((cp >> 12)           | 0xe0);
-            *(result++) = static_cast<uint8_t>((cp >> 6) & 0x3f     | 0x80);
-            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
-        }
-        else if (cp <= internal::CODE_POINT_MAX) {      // four octets
-            *(result++) = static_cast<uint8_t>((cp >> 18)           | 0xf0);
-            *(result++) = static_cast<uint8_t>((cp >> 12)& 0x3f     | 0x80);
-            *(result++) = static_cast<uint8_t>((cp >> 6) & 0x3f     | 0x80);
-            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
-        }
-        else
-            throw invalid_code_point(cp);
-
-        return result;
-    }
-
-    template <typename octet_iterator>
-    uint32_t next(octet_iterator& it, octet_iterator end)
-    {
-        uint32_t cp = 0;
-        internal::utf_error err_code = internal::validate_next(it, end, &cp);
-        switch (err_code) {
-            case internal::OK :
-                break;
-            case internal::NOT_ENOUGH_ROOM :
-                throw not_enough_room();
-            case internal::INVALID_LEAD :
-            case internal::INCOMPLETE_SEQUENCE :
-            case internal::OVERLONG_SEQUENCE :
-                throw invalid_utf8(*it);
-            case internal::INVALID_CODE_POINT :
-                throw invalid_code_point(cp);
-        }
-        return cp;        
-    }
-
-    template <typename octet_iterator>
-    uint32_t prior(octet_iterator& it, octet_iterator start)
-    {
-        octet_iterator end = it;
-        while (internal::is_trail(*(--it))) 
-            if (it < start)
-                throw invalid_utf8(*it); // error - no lead byte in the sequence
-        octet_iterator temp = it;
-        return next(temp, end);
-    }
-
-    /// Deprecated in versions that include "prior"
-    template <typename octet_iterator>
-    uint32_t previous(octet_iterator& it, octet_iterator pass_start)
-    {
-        octet_iterator end = it;
-        while (internal::is_trail(*(--it))) 
-            if (it == pass_start)
-                throw invalid_utf8(*it); // error - no lead byte in the sequence
-        octet_iterator temp = it;
-        return next(temp, end);
-    }
-
-    template <typename octet_iterator, typename distance_type>
-    void advance (octet_iterator& it, distance_type n, octet_iterator end)
-    {
-        for (distance_type i = 0; i < n; ++i)
-            next(it, end);
-    }
-
-    template <typename octet_iterator>
-    typename std::iterator_traits<octet_iterator>::difference_type
-    distance (octet_iterator first, octet_iterator last)
-    {
-        typename std::iterator_traits<octet_iterator>::difference_type dist;
-        for (dist = 0; first < last; ++dist) 
-            next(first, last);
-        return dist;
-    }
-
-    template <typename u16bit_iterator, typename octet_iterator>
-    octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
-    {       
-        while (start != end) {
-            uint32_t cp = internal::mask16(*start++);
-            // Take care of surrogate pairs first
-            if (internal::is_surrogate(cp)) {
-                if (start != end) {
-                    uint32_t trail_surrogate = internal::mask16(*start++);
-                    if (trail_surrogate >= internal::TRAIL_SURROGATE_MIN && trail_surrogate <= internal::TRAIL_SURROGATE_MAX)
-                        cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;                    
-                    else 
-                        throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
-                }
-                else 
-                    throw invalid_utf16(static_cast<uint16_t>(*start));
-            
-            }
-            result = append(cp, result);
-        }
-        return result;        
-    }
-
-    template <typename u16bit_iterator, typename octet_iterator>
-    u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
-    {
-        while (start != end) {
-            uint32_t cp = next(start, end);
-            if (cp > 0xffff) { //make a surrogate pair
-                *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
-                *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
-            }
-            else
-                *result++ = static_cast<uint16_t>(cp);
-        }
-        return result;
-    }
-
-    template <typename octet_iterator, typename u32bit_iterator>
-    octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
-    {
-        while (start != end)
-            result = append(*(start++), result);
-
-        return result;
-    }
-
-    template <typename octet_iterator, typename u32bit_iterator>
-    u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
-    {
-        while (start < end)
-            (*result++) = next(start, end);
-
-        return result;
-    }
-
-    namespace unchecked 
-    {
-        template <typename octet_iterator>
-        octet_iterator append(uint32_t cp, octet_iterator result)
-        {
-            if (cp < 0x80)                        // one octet
-                *(result++) = static_cast<uint8_t>(cp);  
-            else if (cp < 0x800) {                // two octets
-                *(result++) = static_cast<uint8_t>((cp >> 6)          | 0xc0);
-                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
-            }
-            else if (cp < 0x10000) {              // three octets
-                *(result++) = static_cast<uint8_t>((cp >> 12)         | 0xe0);
-                *(result++) = static_cast<uint8_t>((cp >> 6) & 0x3f   | 0x80);
-                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
-            }
-            else {                                // four octets
-                *(result++) = static_cast<uint8_t>((cp >> 18)         | 0xf0);
-                *(result++) = static_cast<uint8_t>((cp >> 12)& 0x3f   | 0x80);
-                *(result++) = static_cast<uint8_t>((cp >> 6) & 0x3f   | 0x80);
-                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
-            }
-            return result;
-        }
-        template <typename octet_iterator>
-        uint32_t next(octet_iterator& it)
-        {
-            uint32_t cp = internal::mask8(*it);
-            typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
-            switch (length) {
-                case 1:
-                    break;
-                case 2:
-                    it++;
-                    cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
-                    break;
-                case 3:
-                    ++it; 
-                    cp = ((cp << 12) & 0xffff) + ((internal::mask8(*it) << 6) & 0xfff);
-                    ++it;
-                    cp += (*it) & 0x3f;
-                    break;
-                case 4:
-                    ++it;
-                    cp = ((cp << 18) & 0x1fffff) + ((internal::mask8(*it) << 12) & 0x3ffff);                
-                    ++it;
-                    cp += (internal::mask8(*it) << 6) & 0xfff;
-                    ++it;
-                    cp += (*it) & 0x3f; 
-                    break;
-            }
-            ++it;
-            return cp;        
-        }
-
-        template <typename octet_iterator>
-        uint32_t prior(octet_iterator& it)
-        {
-            while (internal::is_trail(*(--it))) ;
-            octet_iterator temp = it;
-            return next(temp);
-        }
-
-        // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
-        template <typename octet_iterator>
-        inline uint32_t previous(octet_iterator& it)
-        {
-            return prior(it);
-        }
-        
-        template <typename octet_iterator, typename distance_type>
-        void advance (octet_iterator& it, distance_type n)
-        {
-            for (distance_type i = 0; i < n; ++i)
-                next(it);
-        }
-
-        template <typename octet_iterator>
-        typename std::iterator_traits<octet_iterator>::difference_type
-        distance (octet_iterator first, octet_iterator last)
-        {
-            typename std::iterator_traits<octet_iterator>::difference_type dist;
-            for (dist = 0; first < last; ++dist) 
-                next(first);
-            return dist;
-        }
-
-        template <typename u16bit_iterator, typename octet_iterator>
-        octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
-        {       
-            while (start != end) {
-                uint32_t cp = internal::mask16(*start++);
-            // Take care of surrogate pairs first
-                if (internal::is_surrogate(cp)) {
-                    uint32_t trail_surrogate = internal::mask16(*start++);
-                    cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
-                }
-                result = append(cp, result);
-            }
-            return result;         
-        }
-
-        template <typename u16bit_iterator, typename octet_iterator>
-        u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
-        {
-            while (start != end) {
-                uint32_t cp = next(start);
-                if (cp > 0xffff) { //make a surrogate pair
-                    *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
-                    *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
-                }
-                else
-                    *result++ = static_cast<uint16_t>(cp);
-            }
-            return result;
-        }
-
-        template <typename octet_iterator, typename u32bit_iterator>
-        octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
-        {
-            while (start != end)
-                result = append(*(start++), result);
-
-            return result;
-        }
-
-        template <typename octet_iterator, typename u32bit_iterator>
-        u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
-        {
-            while (start < end)
-                (*result++) = next(start);
-
-            return result;
-        }
-
-    } // namespace utf8::unchecked
-} // namespace utf8 
-
-#endif // header guard
--- a/v1_0/test_data/negative/utf8_invalid.txt
+++ b/v1_0/test_data/negative/utf8_invalid.txt
--- a/v1_0/test_data/utf8samples/UTF-8-demo.txt
+++ b/v1_0/test_data/utf8samples/UTF-8-demo.txt
@ -1,212 +0,0 @@
-
-UTF-8 encoded sample plain-text file
-‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾
-
-Markus Kuhn [ˈmaʳkʊs kuːn] <http://www.cl.cam.ac.uk/~mgk25/> — 2002-07-25
-
-
-The ASCII compatible UTF-8 encoding used in this plain-text file
-is defined in Unicode, ISO 10646-1, and RFC 2279.
-
-
-Using Unicode/UTF-8, you can write in emails and source code things such as
-
-Mathematics and sciences:
-
-  ∮ E⋅da = Q,  n → ∞, ∑ f(i) = ∏ g(i),      ⎧⎡⎛┌─────┐⎞⎤⎫
-                                            ⎪⎢⎜│a²+b³ ⎟⎥⎪
-  ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β),    ⎪⎢⎜│───── ⎟⎥⎪
-                                            ⎪⎢⎜⎷ c₈   ⎟⎥⎪
-  ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ,                   ⎨⎢⎜       ⎟⎥⎬
-                                            ⎪⎢⎜ ∞     ⎟⎥⎪
-  ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (⟦A⟧ ⇔ ⟪B⟫),      ⎪⎢⎜ ⎲     ⎟⎥⎪
-                                            ⎪⎢⎜ ⎳aⁱ-bⁱ⎟⎥⎪
-  2H₂ + O₂ ⇌ 2H₂O, R = 4.7 kΩ, ⌀ 200 mm     ⎩⎣⎝i=1    ⎠⎦⎭
-
-Linguistics and dictionaries:
-
-  ði ıntəˈnæʃənəl fəˈnɛtık əsoʊsiˈeıʃn
-  Y [ˈʏpsilɔn], Yen [jɛn], Yoga [ˈjoːgɑ]
-
-APL:
-
-  ((V⍳V)=⍳⍴V)/V←,V    ⌷←⍳→⍴∆∇⊃‾⍎⍕⌈
-
-Nicer typography in plain text files:
-
-  ╔══════════════════════════════════════════╗
-  ║                                          ║
-  ║   • ‘single’ and “double” quotes         ║
-  ║                                          ║
-  ║   • Curly apostrophes: “We’ve been here” ║
-  ║                                          ║
-  ║   • Latin-1 apostrophe and accents: '´`  ║
-  ║                                          ║
-  ║   • ‚deutsche‘ „Anführungszeichen“       ║
-  ║                                          ║
-  ║   • †, ‡, ‰, •, 3–4, —, −5/+5, ™, …      ║
-  ║                                          ║
-  ║   • ASCII safety test: 1lI|, 0OD, 8B     ║
-  ║                      ╭─────────╮         ║
-  ║   • the euro symbol: │ 14.95 € │         ║
-  ║                      ╰─────────╯         ║
-  ╚══════════════════════════════════════════╝
-
-Combining characters:
-
-  STARGΛ̊TE SG-1, a = v̇ = r̈, a⃑ ⊥ b⃑
-
-Greek (in Polytonic):
-
-  The Greek anthem:
-
-  Σὲ γνωρίζω ἀπὸ τὴν κόψη
-  τοῦ σπαθιοῦ τὴν τρομερή,
-  σὲ γνωρίζω ἀπὸ τὴν ὄψη
-  ποὺ μὲ βία μετράει τὴ γῆ.
-
-  ᾿Απ᾿ τὰ κόκκαλα βγαλμένη
-  τῶν ῾Ελλήνων τὰ ἱερά
-  καὶ σὰν πρῶτα ἀνδρειωμένη
-  χαῖρε, ὦ χαῖρε, ᾿Ελευθεριά!
-
-  From a speech of Demosthenes in the 4th century BC:
-
-  Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν, ὦ ἄνδρες ᾿Αθηναῖοι,
-  ὅταν τ᾿ εἰς τὰ πράγματα ἀποβλέψω καὶ ὅταν πρὸς τοὺς
-  λόγους οὓς ἀκούω· τοὺς μὲν γὰρ λόγους περὶ τοῦ
-  τιμωρήσασθαι Φίλιππον ὁρῶ γιγνομένους, τὰ δὲ πράγματ᾿
-  εἰς τοῦτο προήκοντα,  ὥσθ᾿ ὅπως μὴ πεισόμεθ᾿ αὐτοὶ
-  πρότερον κακῶς σκέψασθαι δέον. οὐδέν οὖν ἄλλο μοι δοκοῦσιν
-  οἱ τὰ τοιαῦτα λέγοντες ἢ τὴν ὑπόθεσιν, περὶ ἧς βουλεύεσθαι,
-  οὐχὶ τὴν οὖσαν παριστάντες ὑμῖν ἁμαρτάνειν. ἐγὼ δέ, ὅτι μέν
-  ποτ᾿ ἐξῆν τῇ πόλει καὶ τὰ αὑτῆς ἔχειν ἀσφαλῶς καὶ Φίλιππον
-  τιμωρήσασθαι, καὶ μάλ᾿ ἀκριβῶς οἶδα· ἐπ᾿ ἐμοῦ γάρ, οὐ πάλαι
-  γέγονεν ταῦτ᾿ ἀμφότερα· νῦν μέντοι πέπεισμαι τοῦθ᾿ ἱκανὸν
-  προλαβεῖν ἡμῖν εἶναι τὴν πρώτην, ὅπως τοὺς συμμάχους
-  σώσομεν. ἐὰν γὰρ τοῦτο βεβαίως ὑπάρξῃ, τότε καὶ περὶ τοῦ
-  τίνα τιμωρήσεταί τις καὶ ὃν τρόπον ἐξέσται σκοπεῖν· πρὶν δὲ
-  τὴν ἀρχὴν ὀρθῶς ὑποθέσθαι, μάταιον ἡγοῦμαι περὶ τῆς
-  τελευτῆς ὁντινοῦν ποιεῖσθαι λόγον.
-
-  Δημοσθένους, Γ´ ᾿Ολυνθιακὸς
-
-Georgian:
-
-  From a Unicode conference invitation:
-
-  გთხოვთ ახლავე გაიაროთ რეგისტრაცია Unicode-ის მეათე საერთაშორისო
-  კონფერენციაზე დასასწრებად, რომელიც გაიმართება 10-12 მარტს,
-  ქ. მაინცში, გერმანიაში. კონფერენცია შეჰკრებს ერთად მსოფლიოს
-  ექსპერტებს ისეთ დარგებში როგორიცაა ინტერნეტი და Unicode-ი,
-  ინტერნაციონალიზაცია და ლოკალიზაცია, Unicode-ის გამოყენება
-  ოპერაციულ სისტემებსა, და გამოყენებით პროგრამებში, შრიფტებში,
-  ტექსტების დამუშავებასა და მრავალენოვან კომპიუტერულ სისტემებში.
-
-Russian:
-
-  From a Unicode conference invitation:
-
-  Зарегистрируйтесь сейчас на Десятую Международную Конференцию по
-  Unicode, которая состоится 10-12 марта 1997 года в Майнце в Германии.
-  Конференция соберет широкий круг экспертов по  вопросам глобального
-  Интернета и Unicode, локализации и интернационализации, воплощению и
-  применению Unicode в различных операционных системах и программных
-  приложениях, шрифтах, верстке и многоязычных компьютерных системах.
-
-Thai (UCS Level 2):
-
-  Excerpt from a poetry on The Romance of The Three Kingdoms (a Chinese
-  classic 'San Gua'):
-
-  [----------------------------|------------------------]
-    ๏ แผ่นดินฮั่นเสื่อมโทรมแสนสังเวช  พระปกเกศกองบู๊กู้ขึ้นใหม่
-  สิบสองกษัตริย์ก่อนหน้าแลถัดไป       สององค์ไซร้โง่เขลาเบาปัญญา
-    ทรงนับถือขันทีเป็นที่พึ่ง           บ้านเมืองจึงวิปริตเป็นนักหนา
-  โฮจิ๋นเรียกทัพทั่วหัวเมืองมา         หมายจะฆ่ามดชั่วตัวสำคัญ
-    เหมือนขับไสไล่เสือจากเคหา      รับหมาป่าเข้ามาเลยอาสัญ
-  ฝ่ายอ้องอุ้นยุแยกให้แตกกัน          ใช้สาวนั้นเป็นชนวนชื่นชวนใจ
-    พลันลิฉุยกุยกีกลับก่อเหตุ          ช่างอาเพศจริงหนาฟ้าร้องไห้
-  ต้องรบราฆ่าฟันจนบรรลัย           ฤๅหาใครค้ำชูกู้บรรลังก์ ฯ
-
-  (The above is a two-column text. If combining characters are handled
-  correctly, the lines of the second column should be aligned with the
-  | character above.)
-
-Ethiopian:
-
-  Proverbs in the Amharic language:
-
-  ሰማይ አይታረስ ንጉሥ አይከሰስ።
-  ብላ ካለኝ እንደአባቴ በቆመጠኝ።
-  ጌጥ ያለቤቱ ቁምጥና ነው።
-  ደሀ በሕልሙ ቅቤ ባይጠጣ ንጣት በገደለው።
-  የአፍ ወለምታ በቅቤ አይታሽም።
-  አይጥ በበላ ዳዋ ተመታ።
-  ሲተረጉሙ ይደረግሙ።
-  ቀስ በቀስ፥ ዕንቁላል በእግሩ ይሄዳል።
-  ድር ቢያብር አንበሳ ያስር።
-  ሰው እንደቤቱ እንጅ እንደ ጉረቤቱ አይተዳደርም።
-  እግዜር የከፈተውን ጉሮሮ ሳይዘጋው አይድርም።
-  የጎረቤት ሌባ፥ ቢያዩት ይስቅ ባያዩት ያጠልቅ።
-  ሥራ ከመፍታት ልጄን ላፋታት።
-  ዓባይ ማደሪያ የለው፥ ግንድ ይዞ ይዞራል።
-  የእስላም አገሩ መካ የአሞራ አገሩ ዋርካ።
-  ተንጋሎ ቢተፉ ተመልሶ ባፉ።
-  ወዳጅህ ማር ቢሆን ጨርስህ አትላሰው።
-  እግርህን በፍራሽህ ልክ ዘርጋ።
-
-Runes:
-
-  ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ
-
-  (Old English, which transcribed into Latin reads 'He cwaeth that he
-  bude thaem lande northweardum with tha Westsae.' and means 'He said
-  that he lived in the northern land near the Western Sea.')
-
-Braille:
-
-  ⡌⠁⠧⠑ ⠼⠁⠒  ⡍⠜⠇⠑⠹⠰⠎ ⡣⠕⠌
-
-  ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠙⠑⠁⠙⠒ ⠞⠕ ⠃⠑⠛⠔ ⠺⠊⠹⠲ ⡹⠻⠑ ⠊⠎ ⠝⠕ ⠙⠳⠃⠞
-  ⠱⠁⠞⠑⠧⠻ ⠁⠃⠳⠞ ⠹⠁⠞⠲ ⡹⠑ ⠗⠑⠛⠊⠌⠻ ⠕⠋ ⠙⠊⠎ ⠃⠥⠗⠊⠁⠇ ⠺⠁⠎
-  ⠎⠊⠛⠝⠫ ⠃⠹ ⠹⠑ ⠊⠇⠻⠛⠹⠍⠁⠝⠂ ⠹⠑ ⠊⠇⠻⠅⠂ ⠹⠑ ⠥⠝⠙⠻⠞⠁⠅⠻⠂
-  ⠁⠝⠙ ⠹⠑ ⠡⠊⠑⠋ ⠍⠳⠗⠝⠻⠲ ⡎⠊⠗⠕⠕⠛⠑ ⠎⠊⠛⠝⠫ ⠊⠞⠲ ⡁⠝⠙
-  ⡎⠊⠗⠕⠕⠛⠑⠰⠎ ⠝⠁⠍⠑ ⠺⠁⠎ ⠛⠕⠕⠙ ⠥⠏⠕⠝ ⠰⡡⠁⠝⠛⠑⠂ ⠋⠕⠗ ⠁⠝⠹⠹⠔⠛ ⠙⠑
-  ⠡⠕⠎⠑ ⠞⠕ ⠏⠥⠞ ⠙⠊⠎ ⠙⠁⠝⠙ ⠞⠕⠲
-
-  ⡕⠇⠙ ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲
-
-  ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞ ⠍⠑⠁⠝ ⠞⠕ ⠎⠁⠹ ⠹⠁⠞ ⡊ ⠅⠝⠪⠂ ⠕⠋ ⠍⠹
-  ⠪⠝ ⠅⠝⠪⠇⠫⠛⠑⠂ ⠱⠁⠞ ⠹⠻⠑ ⠊⠎ ⠏⠜⠞⠊⠊⠥⠇⠜⠇⠹ ⠙⠑⠁⠙ ⠁⠃⠳⠞
-  ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲ ⡊ ⠍⠊⠣⠞ ⠙⠁⠧⠑ ⠃⠑⠲ ⠔⠊⠇⠔⠫⠂ ⠍⠹⠎⠑⠇⠋⠂ ⠞⠕
-  ⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝⠁⠊⠇ ⠁⠎ ⠹⠑ ⠙⠑⠁⠙⠑⠌ ⠏⠊⠑⠊⠑ ⠕⠋ ⠊⠗⠕⠝⠍⠕⠝⠛⠻⠹
-  ⠔ ⠹⠑ ⠞⠗⠁⠙⠑⠲ ⡃⠥⠞ ⠹⠑ ⠺⠊⠎⠙⠕⠍ ⠕⠋ ⠳⠗ ⠁⠝⠊⠑⠌⠕⠗⠎
-  ⠊⠎ ⠔ ⠹⠑ ⠎⠊⠍⠊⠇⠑⠆ ⠁⠝⠙ ⠍⠹ ⠥⠝⠙⠁⠇⠇⠪⠫ ⠙⠁⠝⠙⠎
-  ⠩⠁⠇⠇ ⠝⠕⠞ ⠙⠊⠌⠥⠗⠃ ⠊⠞⠂ ⠕⠗ ⠹⠑ ⡊⠳⠝⠞⠗⠹⠰⠎ ⠙⠕⠝⠑ ⠋⠕⠗⠲ ⡹⠳
-  ⠺⠊⠇⠇ ⠹⠻⠑⠋⠕⠗⠑ ⠏⠻⠍⠊⠞ ⠍⠑ ⠞⠕ ⠗⠑⠏⠑⠁⠞⠂ ⠑⠍⠏⠙⠁⠞⠊⠊⠁⠇⠇⠹⠂ ⠹⠁⠞
-  ⡍⠜⠇⠑⠹ ⠺⠁⠎ ⠁⠎ ⠙⠑⠁⠙ ⠁⠎ ⠁ ⠙⠕⠕⠗⠤⠝⠁⠊⠇⠲
-
-  (The first couple of paragraphs of "A Christmas Carol" by Dickens)
-
-Compact font selection example text:
-
-  ABCDEFGHIJKLMNOPQRSTUVWXYZ /0123456789
-  abcdefghijklmnopqrstuvwxyz £©µÀÆÖÞßéöÿ
-  –—‘“”„†•…‰™œŠŸž€ ΑΒΓΔΩαβγδω АБВГДабвгд
-  ∀∂∈ℝ∧∪≡∞ ↑↗↨↻⇣ ┐┼╔╘░►☺♀ ﬁ<>⑀₂ἠḂӥẄɐː⍎אԱა
-
-Greetings in various languages:
-
-  Hello world, Καλημέρα κόσμε, コンニチハ
-
-Box drawing alignment tests:                                          █
-                                                                      ▉
-  ╔══╦══╗  ┌──┬──┐  ╭──┬──╮  ╭──┬──╮  ┏━━┳━━┓  ┎┒┏┑   ╷  ╻ ┏┯┓ ┌┰┐    ▊ ╱╲╱╲╳╳╳
-  ║┌─╨─┐║  │╔═╧═╗│  │╒═╪═╕│  │╓─╁─╖│  ┃┌─╂─┐┃  ┗╃╄┙  ╶┼╴╺╋╸┠┼┨ ┝╋┥    ▋ ╲╱╲╱╳╳╳
-  ║│╲ ╱│║  │║   ║│  ││ │ ││  │║ ┃ ║│  ┃│ ╿ │┃  ┍╅╆┓   ╵  ╹ ┗┷┛ └┸┘    ▌ ╱╲╱╲╳╳╳
-  ╠╡ ╳ ╞╣  ├╢   ╟┤  ├┼─┼─┼┤  ├╫─╂─╫┤  ┣┿╾┼╼┿┫  ┕┛┖┚     ┌┄┄┐ ╎ ┏┅┅┓ ┋ ▍ ╲╱╲╱╳╳╳
-  ║│╱ ╲│║  │║   ║│  ││ │ ││  │║ ┃ ║│  ┃│ ╽ │┃  ░░▒▒▓▓██ ┊  ┆ ╎ ╏  ┇ ┋ ▎
-  ║└─╥─┘║  │╚═╤═╝│  │╘═╪═╛│  │╙─╀─╜│  ┃└─╂─┘┃  ░░▒▒▓▓██ ┊  ┆ ╎ ╏  ┇ ┋ ▏
-  ╚══╩══╝  └──┴──┘  ╰──┴──╯  ╰──┴──╯  ┗━━┻━━┛  ▗▄▖▛▀▜   └╌╌┘ ╎ ┗╍╍┛ ┋  ▁▂▃▄▅▆▇█
-                                               ▝▀▘▙▄▟
--- a/v1_0/test_data/utf8samples/Unicode_transcriptions.html
+++ b/v1_0/test_data/utf8samples/Unicode_transcriptions.html
@ -1,167 +0,0 @@
-? 	*Unicode Transcriptions* 	Notes <#Notes>
-
-Glyphs <http://www.macchiato.com/unicode/show.html> | Samples
-<http://www.macchiato.com/unicode/Unicode_transcriptions.html> | Charts
-<http://www.macchiato.com/unicode/charts.html> | UTF
-<http://www.macchiato.com/unicode/convert.html> | Forms
-<http://www-4.ibm.com/software/developer/library/utfencodingforms/> |
-Home <http://www.macchiato.com>.
-<http://member.linkexchange.com/cgi-bin/fc/fastcounter-login?750641>
-
-Name 	Text 	Image
-Arabic (Arabic) 	يونِكود 	?
-Arabic (Persian) 	یونی‌کُد 	/ ?/
-Armenian 	Յունիկօդ 	
-Bengali 	য়ূনিকোড 	
-Bopomofo 	ㄊㄨㄥ˅ ㄧˋ ㄇㄚ˅ 	
-ㄨㄢˋ ㄍㄨㄛˊ ㄇㄚ˅ 	
-Braille 	  	 
-Buhid 	  	 
-Canadian Aboriginal 	ᔫᗂᑰᑦ 	
-Cherokee 	ᏳᏂᎪᏛ 	
-Cypriot 	  	 
-Cyrillic (Russian) 	Юникод 	?
-Deseret (English) 	??????? 	
-Devanagari (Hindi) 	यूनिकोड 	?
-Ethiopic 	ዩኒኮድ 	
-Georgian 	უნიკოდი 	?
-Gothic 	  	 
-Greek 	Γιούνικοντ 	
-Gujarati 	યૂનિકોડ 	
-Gurmukhi 	ਯੂਨਿਕੋਡ 	
-Han (Chinese) 	统一码 	?
-統一碼 	?
-万国码 	?
-萬國碼 	?
-Hangul 	유니코드 	
-Hanunoo 	  	 
-Hebrew 	יוניקוד 	
-Hebrew (pointed) 	יוּנִיקוׁד 	
-Hebrew (Yiddish) 	יוניקאָד 	?
-Hiragana (Japanese) 	ゆにこおど 	 
-Katakana (Japanese) 	ユニコード 	?
-Kannada 	ಯೂನಿಕೋಡ್ 	
-Khmer 	យូនីគោដ 	
-Lao 	  	 
-Latin 	Unicode 	Unicode
-Latin (IPA <#English_Pronunciation>) 	ˈjunɪˌkoːd 	?
-Latin (Am. Dict. <#American_Dictionary>) 	Ūnĭcōde̽ 	?
-Limbu 	  	 
-Linear B 	  	 
-Malayalam 	യൂനികോഡ് 	
-Mongolian 	  	
-Myanmar 	  	
-Ogham 	ᚔᚒᚅᚔᚉᚑᚇ 	/ /
-Old Italic 	  	 
-Oriya 	ୟୂନିକୋଡ 	
-Osmanya 	  	 
-Runic (Anglo-Saxon) 	ᛡᚢᚾᛁᚳᚩᛞ 	
-Shavian 	  	 
-Sinhala 	යණනිකෞද් 	
-Syriac 	ܝܘܢܝܩܘܕ 	
-Tagbanwa 	  	 
-Tagalog 	  	 
-Tai Le 	  	 
-Tamil 	யூனிகோட் 	
-Telugu 	యూనికోడ్ 	
-Thaana 	  	
-Thai 	ยูนืโคด 	
-Tibetan (Dzongkha) 	ཨུ་ནི་ཀོཌྲ། 	
-Ugaritic 	  	 
-Yi 	  	
-
-
-      Notes:
-
-There are different ways to transcribe the word “Unicode”, depending on
-the language and script. In some cases there is only one language that
-customarily uses a given script; in others there are many languages. The
-goal here is at a minimum to collect at least one transcription for each
-script in a language customarily written in that script, with more
-languages if possible. If the transcription is the same for multiple
-languages in a script, then a single representative language is used.
-
-Still missing are transcriptions for the items above in RED (in at least
-one language). I would appreciate any other transcriptions, or
-corrections for the ones listed here. Send to mark3@macchiato.com
-<mailto:mark3@macchiato.com>, using the directions below:
-
-    * *Supplying Missing Items*
-          o Most Latin-script languages will follow the spelling, and
-            change the pronunciation. For any that would not, it would
-            be good to have the alternate spelling.
-          o For non-Latin scripts the goal is to match the English
-            pronunciation — /*not*/ spelling. Above is the IPA <#IPA>
-            (in phonemic transcription) that should be matched as
-            closely as possible (without sounding affected in the target
-            language)
-          o Text would be best in either the UTF-8 text, or the code
-            points in hex HTML. E.g. either of the following:
-                + "Юникод"
-                + "&#x042E;&#x043D;&#x0438;&#x043A;&#x043E;&#x0434;"
-                + Note: for / supplementary characters/
-                  <http://www.unicode.org/glossary/#supplementary_character>,
-                  there should be one hex number per code point, not two
-                  surrogates
-                  <http://www.unicode.org/glossary/#surrogate_code_point>:
-                      # &#x10000; /*not*/ &#xD800;&xDC00;
-          o If you have a good font, I'd also appreciate a GIF. It
-            should be *96 x 24* bits, with the text centered, in black
-            on white (plus grays if smoothed).
-    * *Other Comments*
-          o Because some browsers won't handle the text, both text and
-            GIF image are supplied. If you can’t read the text columns,
-            see Display Problems
-            <http://www.unicode.org/help/display_problems.html>.
-          o The Chinese versions (inc. Bopomofo) are translations, not
-            transcriptions, since "transcription in Chinese is pretty
-            lame" [J. Becker].
-          o There are other "translations" of Unicode that may be in
-            use, such as the Vietnamese "Thống Nhất Mã".
-          o For sample pages in different languages on the Unicode site,
-            see What is Unicode?
-            <http://www.unicode.org/unicode/standard/WhatIsUnicode.html>
-          o Americans are not generally used to IPA, and find a variety
-            of different systems in their dictionaries. This one leaves
-            the base letters as they are, and uses diacritics for
-            pronunciation.
-    * *Etymology of /Unicode/*
-          o Coined by J. Becker. Not related to previous usages, such as:
-                + A telegraphic code in which one word or set of letters
-                  represents a sentence or phrase; a telegram or message
-                  in this. (late 19th century, OED)
-          o According to my references, the prefix "uni" is directly
-            from Latin while the word "code" is through French.
-          o The original Indo-European apparently would have been
-            *oino-kau-do ("one strike give"): *kau apparently being
-            related to such English words as: hew, haggle, hoe, hag,
-            hay, hack, caudad, caudal, caudate, caudex, coda, codex,
-            codicil, coward, incus, and Kovač (personal name: "smith").
-                + I will leave the exact derivations to the exegetes,
-                  but I like the association with "haggle" myself.
-    * *Contributions*
-          o This draws on contributions or comments from:
-                + Dixon Au
-                + Joe Becker
-                + Maurice Bauhahn
-                + Abel Cheung
-                + Peter Constable
-                + Michael Everson
-                + Christopher John Fynn
-                + Michael Kaplan
-                + George Kiraz
-                + Abdul Malik
-                + Siva Nataraja
-                + Roozbeh Pournader
-                + Jonathan Rosenne
-                + Jungshik Shin
-
------------------------------------------------------------------------
-	
-
-Terms of Use <http://www.macchiato.com/terms_of_use.html>. Last updated:
-MED - 04/20/2003 15:30:33.
-<http://member.linkexchange.com/cgi-bin/fc/fastcounter-login?750641>
-
- 
-
--- a/v1_0/test_data/utf8samples/big.txt
+++ b/v1_0/test_data/utf8samples/big.txt
--- a/v1_0/test_data/utf8samples/quickbrown.txt
+++ b/v1_0/test_data/utf8samples/quickbrown.txt
@ -1,126 +0,0 @@
-Sentences that contain all letters commonly used in a language
--------------------------------------------------------------
-
-Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> -- 2001-09-02
-
-This file is UTF-8 encoded.
-
-
-Danish (da)
---------
-
-  Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen
-  Wolther spillede på xylofon.
-  (= Quiz contestants were eating strawbery with cream while Wolther
-  the circus clown played on xylophone.)
-
-German (de)
-----------
-
-  Falsches Üben von Xylophonmusik quält jeden größeren Zwerg
-  (= Wrongful practicing of xylophone music tortures every larger dwarf)
-
-  Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich
-  (= Twelve boxing fighters hunted Eva across the dike of Sylt)
-
-  Heizölrückstoßabdämpfung
-  (= fuel oil recoil absorber)
-  (jqvwxy missing, but all non-ASCII letters in one word)
-
-English (en)
------------
-
-  The quick brown fox jumps over the lazy dog
-
-Spanish (es)
------------
-
-  El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y 
-  frío, añoraba a su querido cachorro.
-  (Contains every letter and every accent, but not every combination
-  of vowel + acute.)
-
-French (fr)
-----------
-
-  Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à
-  côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce
-  qui lui permet de penser à la cænogenèse de l'être dont il est question
-  dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui,
-  pense-t-il, diminue çà et là la qualité de son œuvre. 
-
-  l'île exiguë
-  Où l'obèse jury mûr
-  Fête l'haï volapük,
-  Âne ex aéquo au whist,
-  Ôtez ce vœu déçu.
-
-  Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en
-  canoë au delà des îles, près du mälström où brûlent les novæ.
-
-Irish Gaelic (ga)
-----------------
-
-  D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh
-
-Hungarian (hu)
--------------
-
-  Árvíztűrő tükörfúrógép
-  (= flood-proof mirror-drilling machine, only all non-ASCII letters)
-
-Icelandic (is)
--------------
-
-  Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa
-
-  Sævör grét áðan því úlpan var ónýt
-  (some ASCII letters missing)
-
-Japanese (jp)
-------------
-
-  Hiragana: (Iroha)
-
-  いろはにほへとちりぬるを
-  わかよたれそつねならむ
-  うゐのおくやまけふこえて
-  あさきゆめみしゑひもせす
-
-  Katakana:
-
-  イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム
-  ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン
-
-Hebrew (iw)
-----------
-
-  ? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה
-
-Polish (pl)
-----------
-
-  Pchnąć w tę łódź jeża lub ośm skrzyń fig
-  (= To push a hedgehog or eight bins of figs in this boat)
-
-Russian (ru)
------------
-
-  В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!
-  (= Would a citrus live in the bushes of south? Yes, but only a fake one!)
-
-Thai (th)
---------
-
-  [--------------------------|------------------------]
-  ๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า  กว่าบรรดาฝูงสัตว์เดรัจฉาน
-  จงฝ่าฟันพัฒนาวิชาการ           อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร
-  ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า     หัดอภัยเหมือนกีฬาอัชฌาสัย
-  ปฏิบัติประพฤติกฎกำหนดใจ        พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ
-
-  [The copyright for the Thai example is owned by The Computer
-  Association of Thailand under the Royal Patronage of His Majesty the
-  King.]
-
-Please let me know if you find others! Special thanks to the people
-from all over the world who contributed these sentences.
--- a/v1_0/test_drivers/Makefile
+++ b/v1_0/test_drivers/Makefile
@ -1,19 +0,0 @@
-CC = g++
-CFLAGS = -g
-
-all: smoketest regressiontest negativetest utf8readertest
-
-smoketest:
-	cd smoke_test &&  $(MAKE) $@
-
-regressiontest:
-	cd regression_tests &&  $(MAKE) $@
-
-negativetest:
-	cd negative &&  $(MAKE) $@
-
-utf8readertest:
-	cd utf8reader &&  $(MAKE) $@
-
-clean: 
-	rm smoke_test/smoketest regression_tests/regressiontest negative/negative utf8reader/utf8reader
--- a/v1_0/test_drivers/negative/Makefile
+++ b/v1_0/test_drivers/negative/Makefile
@ -1,5 +0,0 @@
-CC = g++
-CFLAGS = -g -Wall -pedantic
-
-negativetest: negative.cpp ../../source/utf8.h
-	$(CC) $(CFLAGS) negative.cpp -onegative
--- a/v1_0/test_drivers/negative/negative.cpp
+++ b/v1_0/test_drivers/negative/negative.cpp
@ -1,39 +0,0 @@
-#include "../../source/utf8.h"
-using namespace utf8;
-
-#include <string>
-#include <iostream>
-#include <fstream>
-#include <algorithm>
-using namespace std;
-
-const char* TEST_FILE_PATH = "../../test_data/negative/utf8_invalid.txt";
-const unsigned INVALID_LINES[] = { 75, 76, 82, 83, 84, 85, 93, 102, 103, 105, 106, 107, 108, 109, 110, 114, 115, 116, 117, 124, 125, 130, 135, 140, 145, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 169, 175, 176, 177, 207, 208, 209, 210, 211, 220, 221, 222, 223, 224, 232, 233, 234, 235, 236, 247, 248, 249, 250, 251, 252, 253, 257, 258, 259, 260, 261, 262, 263, 264, 268, 269}; 
-const unsigned* INVALID_LINES_END = INVALID_LINES + sizeof(INVALID_LINES)/sizeof(unsigned);
-
-int main()
-{
-    // Open the test file
-    ifstream fs8(TEST_FILE_PATH);
-    if (!fs8.is_open()) {
-    cout << "Could not open " << TEST_FILE_PATH << endl;
-    return 0;
-    }
-
-    // Read it line by line
-    unsigned int line_count = 0;
-    char byte;
-    while (!fs8.eof()) {
-        string line;
-        while ((byte = static_cast<char>(fs8.get())) != '\n' && !fs8.eof()) 
-	    line.push_back(byte);
-
-        line_count++;
-	// Print out lines that contain invalid UTF-8
-	if (!is_valid(line.begin(), line.end())) {
-	   const unsigned* u = find(INVALID_LINES, INVALID_LINES_END, line_count);
-	   if (u == INVALID_LINES_END) 
-	       cout << "Unexpected invalid utf-8 at line " << line_count << '\n';
-        }
-    }
-}
--- a/v1_0/test_drivers/performance/Makefile
+++ b/v1_0/test_drivers/performance/Makefile
@ -1,5 +0,0 @@
-CC = g++
-CFLAGS = -O2
-
-iconvtest: iconvtest.cpp ../../source/utf8.h timer.h
-	$(CC) $(CFLAGS) iconvtest.cpp -oiconvtest
--- a/v1_0/test_drivers/performance/iconvtest.cpp
+++ b/v1_0/test_drivers/performance/iconvtest.cpp
@ -1,125 +0,0 @@
-#include <iconv.h>
-#include "../../source/utf8.h"
-#include "timer.h"
-#include <fstream>
-#include <algorithm>
-#include <vector>
-using namespace std;
-
-using namespace utf8;
-
-int main(int argc, char** argv)
-{
-    if (argc != 2) {
-        cout << "\nUsage: iconvtest filename\n";
-        return 0;
-    }
-    const char* test_file_path = argv[1];
-    // Open the test file (UTF-8 encoded text)
-    ifstream fs8(test_file_path, ios::binary);
-    if (!fs8.is_open()) {
-    cout << "Could not open " << test_file_path << endl;
-    return 0;
-    }
-    // get length
-    fs8.seekg(0, ios::end);
-    int length = fs8.tellg();
-    fs8.seekg(0, ios::beg);
-
-    // allocate the buffer (no vector - we are benchmarking conversions, not STL
-    char* buf = new char[length];
-    char* end_buf = buf + length;
-    // fill the data
-    fs8.read(buf, length);
-    fs8.close();
-    // the UTF-16 result will not be larger than this (I hope :) )
-    vector<unsigned char> temputf16;
-    utf8::utf8to16(buf, end_buf, back_inserter(temputf16));
-    int wlength = temputf16.size();
-    unsigned short* utf16buf = new unsigned short[wlength];
-
-    cout << "UTF8 to UTF-16\n";
-    {
-        memset (utf16buf, 0 , wlength * sizeof(unsigned short));
-        // utf-8 cpp:
-        cout << "utf8::utf8to16: ";
-        timer t(cout);
-        utf8::utf8to16(buf, buf + length, utf16buf);
-    }
-
-    {
-        memset (utf16buf, 0 , wlength * sizeof(unsigned short));
-        // utf-8 cpp:
-        cout << "unchecked::utf8to16: ";
-        timer t(cout);
-        utf8::unchecked::utf8to16(buf, buf + length, utf16buf);
-    }
-
-    // the UTF-16 result will not be larger than this (I hope :) )
-    unsigned short* utf16iconvbuf = new unsigned short[wlength];
-    {
-        memset (utf16iconvbuf, 0 , wlength * sizeof(unsigned short));
-        // iconv
-        cout << "iconv: ";
-
-        iconv_t cd = iconv_open("UTF-16LE", "UTF-8");
-        if (cd == iconv_t(-1)) {
-            cout << "Error openning the iconv stream";
-            return 0;
-        } 
-        char* inbuf = buf;
-        size_t in_bytes_left = length;
-        char* outbuf = (char*)utf16iconvbuf;
-        size_t out_bytes_left = wlength * sizeof (unsigned short);
-        {
-            timer t(cout);
-            iconv(cd, &inbuf, &in_bytes_left, &outbuf, &out_bytes_left);
-        }
-        iconv_close(cd);
-    }
-
-    // just check the correctness while we are here:
-    if (!equal(utf16buf, utf16buf + wlength, utf16iconvbuf)) 
-        cout << "Different result!!!\n";
-    
-    // the other way around
-    cout << "UTF16 to UTF-8\n";
-    {
-        //iconv
-        memset(buf, 0, length);    
-        cout<< "iconv: ";
-
-        iconv_t cd = iconv_open("UTF-8", "UTF-16LE");
-        if (cd == iconv_t(-1)) {
-            cout << "Error openning the iconv stream";
-            return 0;
-        } 
-        char* inbuf = (char*)utf16buf;
-        size_t in_bytes_left = wlength * sizeof(unsigned short);
-        char* outbuf =buf;
-        size_t out_bytes_left = length;
-        {
-            timer t(cout);
-            iconv(cd, &inbuf, &in_bytes_left, &outbuf, &out_bytes_left);
-        }
-        iconv_close(cd);
-    }
-
-    {
-        memset (buf, 0 , length);
-        // utf-8 cpp:
-        cout << "unchecked::utf16to8: ";
-        timer t(cout);
-        utf8::unchecked::utf16to8(utf16buf, utf16buf + wlength, buf);
-    }
-    
-    {
-        memset (buf, 0 , length);
-        cout << "utf16to8: ";
-        timer t(cout);
-        utf8::utf16to8(utf16buf, utf16buf + wlength, buf);
-    }
-   
-    delete [] buf;
-    delete [] utf16buf;
-}
--- a/v1_0/test_drivers/performance/timer.h
+++ b/v1_0/test_drivers/performance/timer.h
@ -1,21 +0,0 @@
-#include <ctime>
-#include <iostream>
-struct timer {
-    timer(std::ostream& report) : report(report)
-       {start = std::clock();}
-    ~timer()
-       {
-          using namespace std;
-          end = clock();
-          unsigned milliseconds = (end - start)*1000 / CLOCKS_PER_SEC;
-          report << "Spent " << milliseconds << "ms here\n";  
-       } 
-
-    std::clock_t start;
-    std::clock_t end;
-    std::ostream& report;
-
-private:
-    // just to surpress a VC++ 8.0 warning
-    timer& operator = (const timer&) {};
-};
--- a/v1_0/test_drivers/performance/win32.cpp
+++ b/v1_0/test_drivers/performance/win32.cpp
@ -1,105 +0,0 @@
-#include <windows.h>
-#include "../../source/utf8.h"
-#include "timer.h"
-#include <fstream>
-#include <algorithm>
-#include <vector>
-using namespace std;
-
-using namespace utf8;
-
-int main(int argc, char** argv)
-{
-    if (argc != 2) {
-        cout << "\nUsage: win32test filename\n";
-        return 0;
-    }
-    const char* test_file_path = argv[1];
-    // Open the test file (UTF-8 encoded text)
-    ifstream fs8(test_file_path, ios::binary);
-    if (!fs8.is_open()) {
-    cout << "Could not open " << test_file_path << endl;
-    return 0;
-    }
-    // get length
-    fs8.seekg(0, ios::end);
-    int length = fs8.tellg();
-    fs8.seekg(0, ios::beg);
-
-    // allocate the buffer (no vector - we are benchmarking conversions, not STL
-    char* buf = new char[length];
-    // fill the data
-    fs8.read(buf, length);
-    fs8.close();
-    cout << "UTF8 > UTF16\n";
-    // the UTF-16 result will not be larger than this (I hope :) )
-    vector<wchar_t> temputf16;
-    utf8::utf8to16(buf, buf + length, back_inserter(temputf16));
-    vector<wchar_t>::size_type wlength = temputf16.size();
-    wchar_t* utf16buf = new wchar_t[wlength];
-
-    {
-        memset (utf16buf, 0 , wlength * sizeof(wchar_t));
-        // utf-8 cpp:
-        cout << "utf8::utf8to16: ";
-        timer t(cout);
-        utf8::utf8to16(buf, buf + length, utf16buf);
-    }
-
-    {
-        memset (utf16buf, 0 , wlength * sizeof(wchar_t));
-        // utf-8 cpp:
-        cout << "unchecked::utf8to16: ";
-        timer t(cout);
-        utf8::unchecked::utf8to16(buf, buf + length, utf16buf);
-    }
-
-    // the UTF-16 result will not be larger than this (I hope :) )
-    wchar_t* utf16iconvbuf = new wchar_t[wlength];
-    {
-        memset (utf16iconvbuf, 0 , wlength * sizeof(wchar_t));
-        // win32
-        cout << "win32: ";
-        
-        {
-            timer t(cout);
-            MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, buf, length, utf16iconvbuf, int(wlength));
-        }
-        
-    }
-
-    // just check the correctness while we are here:
-    if (!equal(utf16buf, utf16buf + wlength, utf16iconvbuf)) 
-        cout << "Different result!!!";
-    
-    // the other way around
-    cout << "UTF16 to UTF-8\n";
-    {
-        //win32
-        memset(buf, 0, length);    
-        cout<< "win32: ";
-
-        {
-            timer t(cout);
-            WideCharToMultiByte(CP_UTF8, 0, utf16buf, int(wlength), buf, length, NULL, NULL);
-        }
-    }
-
-    {
-        memset (buf, 0 , length);
-        // utf-8 cpp:
-        cout << "unchecked::utf16to8: ";
-        timer t(cout);
-        utf8::unchecked::utf16to8(utf16buf, utf16buf + wlength, buf);
-    }
-    
-    {
-        memset (buf, 0 , length);
-        cout << "utf16to8: ";
-        timer t(cout);
-        utf8::utf16to8(utf16buf, utf16buf + wlength, buf);
-    }
-    
-    delete [] buf;
-    delete [] utf16buf;
-}
--- a/v1_0/test_drivers/regression_tests/Makefile
+++ b/v1_0/test_drivers/regression_tests/Makefile
@ -1,6 +0,0 @@
-CC = g++
-CFLAGS = -g -Wall -pedantic
-REG_FILES = r1_0Beta1/*h r1_0Beta2/*.h
-
-regressiontest: reg_tests_driver.cpp ../../source/utf8.h $(REG_FILES)
-	$(CC) $(CFLAGS) reg_tests_driver.cpp -o regressiontest
--- a/v1_0/test_drivers/regression_tests/r1_0Beta1/basic_functionality.h
+++ b/v1_0/test_drivers/regression_tests/r1_0Beta1/basic_functionality.h
@ -1,16 +0,0 @@
-#include "../../../source/utf8.h"
-using namespace utf8;
-
-// [ 1528544 ] utf::next does not work correctly for 4-byte sequences
-void id_1528544()
-{
-    unsigned char u10ffff[] = {0xf4, 0x8f, 0xbf, 0xbf};
-    unsigned char* uit = u10ffff;
-    try {
-        unsigned int cp_u10ffff = next (uit, u10ffff + 4);
-        check (cp_u10ffff == 0x10ffff);  
-    }
-    catch (std::exception&) {
-        check (false);
-    }
-}
--- a/v1_0/test_drivers/regression_tests/r1_0Beta1/invalidutf8.h
+++ b/v1_0/test_drivers/regression_tests/r1_0Beta1/invalidutf8.h
@ -1,96 +0,0 @@
-#include "../../../source/utf8.h"
-using namespace utf8;
-
-/// [ 1524459 ] utf8::is_valid does not report some illegal code positions
-void id_1524459()
-{
-// Single UTF-16 surrogates: 
-unsigned char ud800[] = {0xed, 0xa0, 0x80};
-check (!is_valid(ud800, ud800 + 3));
-
-unsigned char udb7f[] = {0xed, 0xad, 0xbf};
-check (!is_valid(udb7f, udb7f + 3));
-  
-unsigned char udb80[] = {0xed, 0xae, 0x80};
-check (!is_valid(udb80, udb80 + 3));
-  
-unsigned char udbff[] = {0xed, 0xaf, 0xbf};
-check (!is_valid(udbff, udbff + 3));
-
-unsigned char udc00[] = {0xed, 0xb0, 0x80};
-check (!is_valid(udc00, udc00 + 3));
-
-unsigned char udf80[] = {0xed, 0xbe, 0x80};
-check (!is_valid(udf80, udf80 + 3));
-
-unsigned char udfff[] = {0xed, 0xbf, 0xbf};
-check (!is_valid(udfff, udfff + 3));
-
-// Paired UTF-16 surrogates: 
-unsigned char ud800_dc00[] = {0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80};
-check (!is_valid(ud800_dc00, ud800_dc00 + 6));
-
-unsigned char ud800_dfff[] = {0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf};
-check (!is_valid(ud800_dfff, ud800_dfff + 6));
-
-unsigned char udb7f_dc00[] = {0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80};
-check (!is_valid(udb7f_dc00, udb7f_dc00 + 6));
-
-unsigned char udb7f_dfff[] = {0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf};
-check (!is_valid(udb7f_dfff, udb7f_dfff + 6));
-
-unsigned char udb80_dc00[] = {0xed, 0xae, 0x80, 0xed, 0xb0, 0x80};
-check (!is_valid(udb80_dc00, udb80_dc00 + 6));
-
-unsigned char udb80_dfff[] = {0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf};
-check (!is_valid(udb80_dfff, udb80_dfff + 6));
-
-unsigned char udbff_dc00[] = {0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80};
-check (!is_valid(udbff_dc00, udbff_dc00 + 6));
-
-unsigned char udbff_dfff[] = {0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf};
-check (!is_valid(udbff_dfff, udbff_dfff + 6));
-
-// Other illegal code points 
-unsigned char ufffe[] = {0xef, 0xbf, 0xbe};
-check (!is_valid(ufffe, ufffe + 3));
-
-unsigned char uffff[] = {0xef, 0xbf, 0xbf};
-check (!is_valid(uffff, uffff + 3));
-}
-
-// [ 1525236 ] utf8::is_valid does not detect overlong sequences
-void id_1525236 ()
-{
-unsigned char u2f_2bytes[] = {0xc0, 0xaf};
-check (!is_valid(u2f_2bytes, u2f_2bytes + 2));
-
-unsigned char u2f_3bytes[] = {0xe0, 0x80, 0xaf};
-check (!is_valid(u2f_3bytes, u2f_3bytes + 3));
-
-unsigned char u2f_4bytes[] = {0xf0, 0x80, 0x80, 0xaf};
-check (!is_valid(u2f_4bytes, u2f_4bytes + 4));
-}
-
-// [ 1528369 ] utf8::find_invalid does not return the start of a seqence
-void id_1528369 ()
-{
-// incomplete utf sequences  
-unsigned char utf_incomplete[] = {0xe6, 0x97, 0x0};
-unsigned char* invalid = find_invalid(utf_incomplete, utf_incomplete + 3);
-check (invalid == utf_incomplete);
-
-unsigned char utf_incomplete_two_seqs[] = {0xE6, 0x97, 0xA5, 0xd1, 0x0};
-invalid = find_invalid(utf_incomplete_two_seqs, utf_incomplete_two_seqs + 5);
-check (invalid == utf_incomplete_two_seqs + 3);
-
-// invalid code point
-unsigned char udbff[] = {0xed, 0xaf, 0xbf};
-invalid = find_invalid(udbff, udbff + 3);
-check (invalid == udbff);
-
-// overlong sequence
-unsigned char u2f_3bytes[] = {0xe0, 0x80, 0xaf};
-invalid = find_invalid(u2f_3bytes, u2f_3bytes + 3);
-check (invalid == u2f_3bytes);
-}
--- a/v1_0/test_drivers/regression_tests/r1_0Beta2/basic_functionality.h
+++ b/v1_0/test_drivers/regression_tests/r1_0Beta2/basic_functionality.h
@ -1,11 +0,0 @@
-#include "../../../source/utf8.h"
-using namespace utf8;
-
-// [ 1531740 ] utf8::append does not work correctly for some code points.
-void id_1531740()
-{
-    unsigned cp_u3044 = 0x3044U;
-    unsigned char u3044[] = {0x0, 0x0, 0x0, 0x0};
-    append(cp_u3044, u3044);
-    check (u3044[0] == 0xe3 && u3044[1] == 0x81 && u3044[2] == 0x84 && u3044[3] == 0);
-}
--- a/v1_0/test_drivers/regression_tests/r1_0Beta3/basic_functionality.h
+++ b/v1_0/test_drivers/regression_tests/r1_0Beta3/basic_functionality.h
@ -1,11 +0,0 @@
-#include "../../../source/utf8.h"
-using namespace utf8;
-
-// [ 1538338 ] unchecked::next does not work correctly for 4-byte sequences.
-void id_1538338()
-{
-    char* four_bytes = "\xf0\x90\x8d\x86";
-    char* it = four_bytes;
-    int cp = unchecked::next(it);
-    check (cp == 0x10346);
-}
--- a/v1_0/test_drivers/regression_tests/reg_tests_driver.cpp
+++ b/v1_0/test_drivers/regression_tests/reg_tests_driver.cpp
@ -1,40 +0,0 @@
-#include <iostream>
-using namespace std;
-
-inline void check_impl (bool condition, const char* file, int line)
- {
-   if (!condition) { 
-     cout << "Check Failed! File: " << file << " Line: " << line << '\n';
-   }
- } 
-
-#define check(c) check_impl(c, __FILE__, __LINE__);
-
-// Release 1.0 Beta 1
-#include "r1_0Beta1/invalidutf8.h"
-#include "r1_0Beta1/basic_functionality.h"
-// Release 1.0 Beta 2
-#include "r1_0Beta2/basic_functionality.h"
-// Release 1.0 Beta 3
-#include "r1_0Beta3/basic_functionality.h"
-
-
-
-int main()
-{
-// Release 1.0 Beta 1
-//r1_0Beta1/invalidutf8.h
-    id_1524459();
-    id_1525236();  
-    id_1528369();
-//r1_0Beta1/basic_functionality.h
-    id_1528544();
-
-// Release 1.0 Beta 2
-//r1_0Beta2/basic_functionality.h
-    id_1531740();
-
-// Release 1.0 Beta 3
-//r1_0Beta3/basic_functionality.h
-    id_1538338();   
-}
--- a/v1_0/test_drivers/runtests.pl
+++ b/v1_0/test_drivers/runtests.pl
@ -1,50 +0,0 @@
-#! /usr/bin/perl
-
-$report_name = './report.txt';
-
-# Create the report file
-die if !open(REPORT, ">$report_name");
-
-# First, build everything
-print REPORT "==================Make output==================\n";
-close($report_name);
-
-`make >> $report_name`;
-die if !open(REPORT, ">>$report_name");
-print REPORT "==================End of Make output==================\n";
-print REPORT "\n";
-# Now, run individual tests and create the report
-print REPORT "==================Smoke Test ==================\n";
-close($report_name);
-chdir 'smoke_test';
-`./smoketest >> ../$report_name`;
-chdir '..';
-die if !open(REPORT, ">>$report_name");
-print REPORT "==================End of smoke test==================\n";
-print REPORT "\n";
-print REPORT "==================Regression Test ==================\n"; 
-close($report_name);
-chdir 'regression_tests';
-`./regressiontest >> ../$report_name`;
-chdir '..';
-die if !open(REPORT, ">>$report_name");
-print REPORT "==================End of regression test==================\n";
-print REPORT "\n";
-print REPORT "==================Negative Test ==================\n"; 
-close($report_name);
-chdir 'negative';
-`./negative >> ../$report_name`;
-chdir '..';
-die if !open(REPORT, ">>$report_name");
-print REPORT "==================End of negative test==================\n";
-print REPORT "\n";
-print REPORT "==================utf8reader runs ==================\n"; 
-close($report_name);
-chdir 'utf8reader';
-`./utf8reader ../../test_data/utf8samples/quickbrown.txt >> ../$report_name`;
-`./utf8reader ../../test_data/utf8samples/Unicode_transcriptions.html >> ../$report_name`;
-`./utf8reader ../../test_data/utf8samples/UTF-8-demo.txt >> ../$report_name`;
-chdir '..';
-die if !open(REPORT, ">>$report_name");
-print REPORT "==================End of utf8reader runs==================\n";
-print REPORT "\n";
--- a/v1_0/test_drivers/smoke_test/Makefile
+++ b/v1_0/test_drivers/smoke_test/Makefile
@ -1,5 +0,0 @@
-CC = g++
-CFLAGS = -g -Wall
-
-smoketest: test.cpp ../../source/utf8.h
-	$(CC) $(CFLAGS) test.cpp -osmoketest
--- a/v1_0/test_drivers/smoke_test/test.cpp
+++ b/v1_0/test_drivers/smoke_test/test.cpp
@ -1,235 +0,0 @@
-
-#include <cassert>
-#include <vector>
-#include <iterator>
-#include "../../source/utf8.h"
-using namespace utf8;
-using namespace std;
-
-int main()
-{
-    //append
-    unsigned char u[5] = {0,0,0,0,0};
-
-    unsigned char* end = append(0x0448, u);
-    assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3] == 0 && u[4] == 0);
-
-    end = append(0x65e5, u);
-    assert (u[0] == 0xe6 && u[1] == 0x97 && u[2] == 0xa5 && u[3] == 0 && u[4] == 0);
-
-    end = append(0x3044, u);
-    assert (u[0] == 0xe3 && u[1] == 0x81 && u[2] == 0x84 && u[3] == 0 && u[4] == 0);
-
-    end = append(0x10346, u);
-    assert (u[0] == 0xf0 && u[1] == 0x90 && u[2] == 0x8d && u[3] == 0x86 && u[4] == 0);
-
-    //next
-    char* twochars = "\xe6\x97\xa5\xd1\x88";
-    char* w = twochars;
-    int cp = next(w, twochars + 6);
-    assert (cp == 0x65e5);
-    assert (w == twochars + 3);
-
-    char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
-    w = threechars;
-    cp = next(w, threechars + 9);
-    assert (cp == 0x10346);
-    assert (w == threechars + 4);
-    cp = next(w, threechars + 9);
-    assert (cp == 0x65e5);
-    assert (w == threechars + 7);
-    cp = next(w, threechars + 9);
-    assert (cp == 0x0448);
-    assert (w == threechars + 9);
-
-    //prior
-    w = twochars + 3;
-    cp = prior (w, twochars);
-    assert (cp == 0x65e5);
-    assert (w == twochars);
-
-    w = threechars + 9;
-    cp = prior(w, threechars);
-    assert (cp == 0x0448);
-    assert (w == threechars + 7);
-    cp = prior(w, threechars);
-    assert (cp == 0x65e5);
-    assert (w == threechars + 4);
-    cp = prior(w, threechars);
-    assert (cp == 0x10346);
-    assert (w == threechars); 
-
-    //previous (deprecated)
-    w = twochars + 3;
-    cp = previous (w, twochars - 1);
-    assert (cp == 0x65e5);
-    assert (w == twochars);
-
-    w = threechars + 9;
-    cp = previous(w, threechars - 1);
-    assert (cp == 0x0448);
-    assert (w == threechars + 7);
-    cp = previous(w, threechars -1);
-    assert (cp == 0x65e5);
-    assert (w == threechars + 4);
-    cp = previous(w, threechars - 1);
-    assert (cp == 0x10346);
-    assert (w == threechars); 
-
-    // advance
-    w = twochars;
-    advance (w, 2, twochars + 6);
-    assert (w == twochars + 5);
-
-    // distance
-    size_t dist = utf8::distance(twochars, twochars + 5);
-    assert (dist == 2);
-
-    // utf32to8
-    int utf32string[] = {0x448, 0x65E5, 0x10346, 0};
-    vector<char> utf8result;
-    utf32to8(utf32string, utf32string + 3, back_inserter(utf8result));
-    assert (utf8result.size() == 9);
-    // try it with the return value;
-    char* utf8_end = utf32to8(utf32string, utf32string + 3, &utf8result[0]);
-    assert (utf8_end == &utf8result[0] + 9);
-
-    //utf8to32
-    vector<int> utf32result;
-    utf8to32(twochars, twochars + 5, back_inserter(utf32result));
-    assert (utf32result.size() == 2);
-    // try it with the return value;
-    int* utf32_end = utf8to32(twochars, twochars + 5, &utf32result[0]);
-    assert (utf32_end == &utf32result[0] + 2);
-
-    //utf16to8
-    unsigned short utf16string[] = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e};
-    utf8result.clear();
-    utf16to8(utf16string, utf16string + 5, back_inserter(utf8result));
-    assert (utf8result.size() == 10);
-    // try it with the return value;
-    utf8_end = utf16to8 (utf16string, utf16string + 5, &utf8result[0]);
-    assert (utf8_end == &utf8result[0] + 10);
-
-    //utf8to16
-    char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e";
-    vector <unsigned short> utf16result;
-    utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result));
-    assert (utf16result.size() == 4);
-    assert (utf16result[2] == 0xd834);
-    assert (utf16result[3] == 0xdd1e);
-    // try it with the return value;
-    unsigned short* utf16_end = utf8to16 (utf8_with_surrogates, utf8_with_surrogates + 9, &utf16result[0]);
-    assert (utf16_end == &utf16result[0] + 4);
-
-    //find_invalid
-    char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa";
-    char* invalid = find_invalid(utf_invalid, utf_invalid + 6);
-    assert (invalid == utf_invalid + 5);
-
-    //is_valid
-    bool bvalid = is_valid(utf_invalid, utf_invalid + 6);
-    assert (bvalid == false);
-    bvalid = is_valid(utf8_with_surrogates, utf8_with_surrogates + 9);
-    assert (bvalid == true);
-
-    //is_bom
-    unsigned char byte_order_mark[] = {0xef, 0xbb, 0xbf};
-    bool bbom = is_bom(byte_order_mark);
-    assert (bbom == true);
-
-    //////////////////////////////////////////////////////////
-    //// Unchecked variants
-    //////////////////////////////////////////////////////////
-
-    //append
-    memset(u, 0, 5);
-    end = unchecked::append(0x0448, u);
-    assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3] == 0 && u[4] == 0);
-
-    end = unchecked::append(0x65e5, u);
-    assert (u[0] == 0xe6 && u[1] == 0x97 && u[2] == 0xa5 && u[3] == 0 && u[4] == 0);
-
-    end = unchecked::append(0x10346, u);
-    assert (u[0] == 0xf0 && u[1] == 0x90 && u[2] == 0x8d && u[3] == 0x86 && u[4] == 0);
-
-    //next
-    w = twochars;
-    cp = unchecked::next(w);
-    assert (cp == 0x65e5);
-    assert (w == twochars + 3);
-
-    w = threechars;
-    cp = unchecked::next(w);
-    assert (cp == 0x10346);
-    assert (w == threechars + 4);
-    cp = unchecked::next(w);
-    assert (cp == 0x65e5);
-    assert (w == threechars + 7);
-    cp = unchecked::next(w);
-    assert (cp == 0x0448);
-    assert (w == threechars + 9);
-
-
-    //previous (calls prior internally)
-    w = twochars + 3;
-    cp = unchecked::previous (w);
-    assert (cp == 0x65e5);
-    assert (w == twochars);
-
-    w = threechars + 9;
-    cp = unchecked::previous(w);
-    assert (cp == 0x0448);
-    assert (w == threechars + 7);
-    cp = unchecked::previous(w);
-    assert (cp == 0x65e5);
-    assert (w == threechars + 4);
-    cp = unchecked::previous(w);
-    assert (cp == 0x10346);
-    assert (w == threechars); 
-
-    // advance
-    w = twochars;
-    unchecked::advance (w, 2);
-    assert (w == twochars + 5);
-
-    // distance
-    dist = unchecked::distance(twochars, twochars + 5);
-    assert (dist == 2);
-
-    // utf32to8
-    utf8result.clear();
-    unchecked::utf32to8(utf32string, utf32string + 3, back_inserter(utf8result));
-    assert (utf8result.size() == 9);
-    // try it with the return value;
-    utf8_end = utf32to8(utf32string, utf32string + 3, &utf8result[0]);
-    assert(utf8_end == &utf8result[0] + 9);
-
-    //utf8to32
-    utf32result.clear();
-    unchecked::utf8to32(twochars, twochars + 5, back_inserter(utf32result));
-    assert (utf32result.size() == 2);
-    // try it with the return value;
-    utf32_end = utf8to32(twochars, twochars + 5, &utf32result[0]);
-    assert (utf32_end == &utf32result[0] + 2);
-
-    //utf16to8
-    utf8result.clear();
-    unchecked::utf16to8(utf16string, utf16string + 5, back_inserter(utf8result));
-    assert (utf8result.size() == 10);
-    // try it with the return value;
-    utf8_end = utf16to8 (utf16string, utf16string + 5, &utf8result[0]);
-    assert (utf8_end == &utf8result[0] + 10);
-
-    //utf8to16
-    utf16result.clear();
-    unchecked::utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result));
-    assert (utf16result.size() == 4);
-    assert (utf16result[2] == 0xd834);
-    assert (utf16result[3] == 0xdd1e);
-    // try it with the return value;
-    utf16_end = utf8to16 (utf8_with_surrogates, utf8_with_surrogates + 9, &utf16result[0]);
-    assert (utf16_end == &utf16result[0] + 4);
-}
-
-
--- a/v1_0/test_drivers/utf8reader/Makefile
+++ b/v1_0/test_drivers/utf8reader/Makefile
@ -1,5 +0,0 @@
-CC = g++
-CFLAGS = -g -Wall -pedantic
-
-utf8readertest: utf8reader.cpp ../../source/utf8.h
-	$(CC) $(CFLAGS) utf8reader.cpp -o utf8reader
--- a/v1_0/test_drivers/utf8reader/utf8reader.cpp
+++ b/v1_0/test_drivers/utf8reader/utf8reader.cpp
@ -1,134 +0,0 @@
-#include "../../source/utf8.h"
-using namespace utf8;
-
-#include <string>
-#include <iostream>
-#include <fstream>
-#include <vector>
-using namespace std;
-
-int main(int argc, char** argv)
-{
-    if (argc != 2) {
-        cout << "\nUsage: utfreader filename\n";
-        return 0;
-    }
-    const char* TEST_FILE_PATH = argv[1];
-    // Open the test file
-    ifstream fs8(TEST_FILE_PATH);
-    if (!fs8.is_open()) {
-    cout << "Could not open " << TEST_FILE_PATH << endl;
-    return 0;
-    }
-
-    // Read it line by line
-    unsigned int line_count = 0;
-    char byte;
-    while (!fs8.eof()) {
-        string line;
-        while ((byte = static_cast<char>(fs8.get())) != '\n' && !fs8.eof()) 
-            line.push_back(byte);
-
-        line_count++;
-	// Play around with each line and convert it to utf16
-        string::iterator line_start = line.begin();
-        string::iterator line_end   = line.end();
-        line_end = find_invalid(line_start, line_end);
-        if (line_end != line.end()) 
-            cout << "Line " << line_count << ": Invalid utf-8 at byte " << int(line.end() - line_end) << '\n';
-
-        // Convert it to utf-16 and write to the file
-        vector<unsigned short> utf16_line;
-        utf8to16(line_start, line_end, back_inserter(utf16_line));
-
-        // Back to utf-8 and compare it to the original line.
-        string back_to_utf8;
-        utf16to8(utf16_line.begin(), utf16_line.end(), back_inserter(back_to_utf8));
-        if (back_to_utf8.compare(string(line_start, line_end)) != 0) 
-            cout << "Line " << line_count << ": Conversion to UTF-16 and back failed" << '\n';
-
-        // Now, convert it to utf-32, back to utf-8 and compare
-        vector <unsigned> utf32_line;
-        utf8to32(line_start, line_end, back_inserter(utf32_line));
-        back_to_utf8.clear();
-        utf32to8(utf32_line.begin(), utf32_line.end(), back_inserter(back_to_utf8));
-        if (back_to_utf8.compare(string(line_start, line_end)) != 0) 
-            cout << "Line " << line_count << ": Conversion to UTF-32 and back failed" << '\n';
-
-        // Now, iterate and back
-        unsigned char_count = 0;
-        string::iterator it = line_start;
-        while (it != line_end) {
-            next(it, line_end);
-            char_count++;
-        }
-        if (char_count != utf32_line.size())
-            cout << "Line " << line_count << ": Error in iterating with next - wrong number of characters" << '\n';
-
-        string::iterator adv_it = line_start;
-        utf8::advance(adv_it, char_count, line_end);
-        if (adv_it != line_end)
-            cout << "Line " << line_count << ": Error in advance function" << '\n';
-
-        if (string::size_type(utf8::distance(line_start, line_end)) != char_count)
-            cout << "Line " << line_count << ": Error in distance function" << '\n';
-
-        while (it != line_start) {
-            previous(it, line.rend().base());
-            char_count--;
-        }
-        if (char_count != 0)
-            cout << "Line " << line_count << ": Error in iterating with previous - wrong number of characters" << '\n';
-
-        //======================== Now, the unchecked versions ======================
-        // Convert it to utf-16 and compare to the checked version
-        vector<unsigned short> utf16_line_unchecked;
-        unchecked::utf8to16(line_start, line_end, back_inserter(utf16_line_unchecked));
-
-        if (utf16_line != utf16_line_unchecked)
-            cout << "Line " << line_count << ": Error in unchecked::utf8to16" << '\n';
-
-        // Back to utf-8 and compare it to the original line.
-        back_to_utf8.clear();
-        unchecked::utf16to8(utf16_line_unchecked.begin(), utf16_line_unchecked.end(), back_inserter(back_to_utf8));
-        if (back_to_utf8.compare(string(line_start, line_end)) != 0) 
-            cout << "Line " << line_count << ": Unchecked conversion to UTF-16 and back failed" << '\n';
-
-        // Now, convert it to utf-32, back to utf-8 and compare
-        vector <unsigned> utf32_line_unchecked;
-        unchecked::utf8to32(line_start, line_end, back_inserter(utf32_line_unchecked));
-        if (utf32_line != utf32_line_unchecked)
-            cout << "Line " << line_count << ": Error in unchecked::utf8to32" << '\n';
-
-        back_to_utf8.clear();
-        unchecked::utf32to8(utf32_line.begin(), utf32_line.end(), back_inserter(back_to_utf8));
-        if (back_to_utf8.compare(string(line_start, line_end)) != 0) 
-            cout << "Line " << line_count << ": Unchecked conversion to UTF-32 and back failed" << '\n';
-
-        // Now, iterate and back
-        char_count = 0;
-        it = line_start;
-        while (it != line_end) {
-            unchecked::next(it);
-            char_count++;
-        }
-        if (char_count != utf32_line.size())
-            cout << "Line " << line_count << ": Error in iterating with unchecked::next - wrong number of characters" << '\n';
-
-        adv_it = line_start;
-        utf8::unchecked::advance(adv_it, char_count);
-        if (adv_it != line_end)
-            cout << "Line " << line_count << ": Error in unchecked::advance function" << '\n';
-
-        if (string::size_type(utf8::unchecked::distance(line_start, line_end)) != char_count)
-            cout << "Line " << line_count << ": Error in unchecked::distance function" << '\n';
-
-        while (it != line_start) {
-            unchecked::previous(it);
-            char_count--;
-        }
-        if (char_count != 0)
-            cout << "Line " << line_count << ": Error in iterating with unchecked::previous - wrong number of characters" << '\n';
-
-    }
-}